]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clstate.c
nfscl: Add hash lists for the NFSv4 opens
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clstate.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82
83 #include <fs/nfs/nfsport.h>
84
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;     /* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
104     u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
105 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
106     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
107     struct nfsclopen **);
108 static void nfscl_clrelease(struct nfsclclient *);
109 static void nfscl_cleanclient(struct nfsclclient *);
110 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
111     struct ucred *, NFSPROC_T *);
112 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
113     struct nfsmount *, struct ucred *, NFSPROC_T *);
114 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
115     NFSPROC_T *);
116 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
117     struct nfscllock *, int);
118 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
119     struct nfscllock **, int);
120 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
121 static u_int32_t nfscl_nextcbident(void);
122 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
123 static struct nfsclclient *nfscl_getclnt(u_int32_t);
124 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
125 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
126     int);
127 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
128     int, struct nfsclrecalllayout **);
129 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
130 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
131     int);
132 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
133 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
134     u_int8_t *, struct nfscllock **);
135 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
136 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
137     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
138 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
139     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
140     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
141 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
142     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
143     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
144 static void nfscl_totalrecall(struct nfsclclient *);
145 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
146     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
147 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
148     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
149     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
150 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
151     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
152     struct ucred *, NFSPROC_T *);
153 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
154     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
155 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
156     bool);
157 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
158 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
159 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
160     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
161     vnode_t *);
162 static void nfscl_freeopenowner(struct nfsclowner *, int);
163 static void nfscl_cleandeleg(struct nfscldeleg *);
164 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
165     struct nfsmount *, NFSPROC_T *);
166 static void nfscl_emptylockowner(struct nfscllockowner *,
167     struct nfscllockownerfhhead *);
168 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
169     struct nfsclflayouthead *);
170 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
171     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
172 static int nfscl_seq(uint32_t, uint32_t);
173 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
174     struct ucred *, NFSPROC_T *);
175 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
176     struct ucred *, NFSPROC_T *);
177
178 static short nfscberr_null[] = {
179         0,
180         0,
181 };
182
183 static short nfscberr_getattr[] = {
184         NFSERR_RESOURCE,
185         NFSERR_BADHANDLE,
186         NFSERR_BADXDR,
187         NFSERR_RESOURCE,
188         NFSERR_SERVERFAULT,
189         0,
190 };
191
192 static short nfscberr_recall[] = {
193         NFSERR_RESOURCE,
194         NFSERR_BADHANDLE,
195         NFSERR_BADSTATEID,
196         NFSERR_BADXDR,
197         NFSERR_RESOURCE,
198         NFSERR_SERVERFAULT,
199         0,
200 };
201
202 static short *nfscl_cberrmap[] = {
203         nfscberr_null,
204         nfscberr_null,
205         nfscberr_null,
206         nfscberr_getattr,
207         nfscberr_recall
208 };
209
210 #define NETFAMILY(clp) \
211                 (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
212
213 /*
214  * Called for an open operation.
215  * If the nfhp argument is NULL, just get an openowner.
216  */
217 int
218 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
219     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
220     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
221 {
222         struct nfsclclient *clp;
223         struct nfsclowner *owp, *nowp;
224         struct nfsclopen *op = NULL, *nop = NULL;
225         struct nfscldeleg *dp;
226         struct nfsclownerhead *ohp;
227         u_int8_t own[NFSV4CL_LOCKNAMELEN];
228         int ret;
229
230         if (newonep != NULL)
231                 *newonep = 0;
232         if (opp != NULL)
233                 *opp = NULL;
234         if (owpp != NULL)
235                 *owpp = NULL;
236
237         /*
238          * Might need one or both of these, so MALLOC them now, to
239          * avoid a tsleep() in MALLOC later.
240          */
241         nowp = malloc(sizeof (struct nfsclowner),
242             M_NFSCLOWNER, M_WAITOK);
243         if (nfhp != NULL) {
244             nop = malloc(sizeof (struct nfsclopen) +
245                 fhlen - 1, M_NFSCLOPEN, M_WAITOK);
246             nop->nfso_hash.le_prev = NULL;
247         }
248         ret = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
249         if (ret != 0) {
250                 free(nowp, M_NFSCLOWNER);
251                 if (nop != NULL)
252                         free(nop, M_NFSCLOPEN);
253                 return (ret);
254         }
255
256         /*
257          * Get the Open iff it already exists.
258          * If none found, add the new one or return error, depending upon
259          * "create".
260          */
261         NFSLOCKCLSTATE();
262         dp = NULL;
263         /* First check the delegation list */
264         if (nfhp != NULL && usedeleg) {
265                 LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
266                         if (dp->nfsdl_fhlen == fhlen &&
267                             !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
268                                 if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
269                                     (dp->nfsdl_flags & NFSCLDL_WRITE))
270                                         break;
271                                 dp = NULL;
272                                 break;
273                         }
274                 }
275         }
276
277         /* For NFSv4.1/4.2 and this option, use a single open_owner. */
278         if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
279                 nfscl_filllockowner(NULL, own, F_POSIX);
280         else
281                 nfscl_filllockowner(p->td_proc, own, F_POSIX);
282         if (dp != NULL)
283                 ohp = &dp->nfsdl_owner;
284         else
285                 ohp = &clp->nfsc_owner;
286         /* Now, search for an openowner */
287         LIST_FOREACH(owp, ohp, nfsow_list) {
288                 if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
289                         break;
290         }
291
292         /*
293          * Create a new open, as required.
294          */
295         nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
296             cred, newonep);
297
298         /*
299          * Now, check the mode on the open and return the appropriate
300          * value.
301          */
302         if (retp != NULL) {
303                 if (nfhp != NULL && dp != NULL && nop == NULL)
304                         /* new local open on delegation */
305                         *retp = NFSCLOPEN_SETCRED;
306                 else
307                         *retp = NFSCLOPEN_OK;
308         }
309         if (op != NULL && (amode & ~(op->nfso_mode))) {
310                 op->nfso_mode |= amode;
311                 if (retp != NULL && dp == NULL)
312                         *retp = NFSCLOPEN_DOOPEN;
313         }
314
315         /*
316          * Serialize modifications to the open owner for multiple threads
317          * within the same process using a read/write sleep lock.
318          * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
319          * by acquiring a shared lock.  The close operations still use an
320          * exclusive lock for this case.
321          */
322         if (lockit != 0) {
323                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
324                         /*
325                          * Get a shared lock on the OpenOwner, but first
326                          * wait for any pending exclusive lock, so that the
327                          * exclusive locker gets priority.
328                          */
329                         nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
330                             NFSCLSTATEMUTEXPTR, NULL);
331                         nfsv4_getref(&owp->nfsow_rwlock, NULL,
332                             NFSCLSTATEMUTEXPTR, NULL);
333                 } else
334                         nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
335         }
336         NFSUNLOCKCLSTATE();
337         if (nowp != NULL)
338                 free(nowp, M_NFSCLOWNER);
339         if (nop != NULL)
340                 free(nop, M_NFSCLOPEN);
341         if (owpp != NULL)
342                 *owpp = owp;
343         if (opp != NULL)
344                 *opp = op;
345         return (0);
346 }
347
348 /*
349  * Create a new open, as required.
350  */
351 static void
352 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
353     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
354     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
355     struct ucred *cred, int *newonep)
356 {
357         struct nfsclowner *owp = *owpp, *nowp;
358         struct nfsclopen *op, *nop;
359
360         if (nowpp != NULL)
361                 nowp = *nowpp;
362         else
363                 nowp = NULL;
364         if (nopp != NULL)
365                 nop = *nopp;
366         else
367                 nop = NULL;
368         if (owp == NULL && nowp != NULL) {
369                 NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
370                 LIST_INIT(&nowp->nfsow_open);
371                 nowp->nfsow_clp = clp;
372                 nowp->nfsow_seqid = 0;
373                 nowp->nfsow_defunct = 0;
374                 nfscl_lockinit(&nowp->nfsow_rwlock);
375                 if (dp != NULL) {
376                         nfsstatsv1.cllocalopenowners++;
377                         LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
378                 } else {
379                         nfsstatsv1.clopenowners++;
380                         LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
381                 }
382                 owp = *owpp = nowp;
383                 *nowpp = NULL;
384                 if (newonep != NULL)
385                         *newonep = 1;
386         }
387
388          /* If an fhp has been specified, create an Open as well. */
389         if (fhp != NULL) {
390                 /* and look for the correct open, based upon FH */
391                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
392                         if (op->nfso_fhlen == fhlen &&
393                             !NFSBCMP(op->nfso_fh, fhp, fhlen))
394                                 break;
395                 }
396                 if (op == NULL && nop != NULL) {
397                         nop->nfso_own = owp;
398                         nop->nfso_mode = 0;
399                         nop->nfso_opencnt = 0;
400                         nop->nfso_posixlock = 1;
401                         nop->nfso_fhlen = fhlen;
402                         NFSBCOPY(fhp, nop->nfso_fh, fhlen);
403                         LIST_INIT(&nop->nfso_lock);
404                         nop->nfso_stateid.seqid = 0;
405                         nop->nfso_stateid.other[0] = 0;
406                         nop->nfso_stateid.other[1] = 0;
407                         nop->nfso_stateid.other[2] = 0;
408                         KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
409                         newnfs_copyincred(cred, &nop->nfso_cred);
410                         if (dp != NULL) {
411                                 TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
412                                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
413                                     nfsdl_list);
414                                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
415                                 nfsstatsv1.cllocalopens++;
416                         } else {
417                                 LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
418                                     nop, nfso_hash);
419                                 nfsstatsv1.clopens++;
420                         }
421                         LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
422                         *opp = nop;
423                         *nopp = NULL;
424                         if (newonep != NULL)
425                                 *newonep = 1;
426                 } else {
427                         *opp = op;
428                 }
429         }
430 }
431
432 /*
433  * Called to find/add a delegation to a client.
434  */
435 int
436 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
437     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
438 {
439         struct nfscldeleg *dp = *dpp, *tdp;
440
441         /*
442          * First, if we have received a Read delegation for a file on a
443          * read/write file system, just return it, because they aren't
444          * useful, imho.
445          */
446         if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
447             (dp->nfsdl_flags & NFSCLDL_READ)) {
448                 (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
449                 free(dp, M_NFSCLDELEG);
450                 *dpp = NULL;
451                 return (0);
452         }
453
454         /* Look for the correct deleg, based upon FH */
455         NFSLOCKCLSTATE();
456         tdp = nfscl_finddeleg(clp, nfhp, fhlen);
457         if (tdp == NULL) {
458                 if (dp == NULL) {
459                         NFSUNLOCKCLSTATE();
460                         return (NFSERR_BADSTATEID);
461                 }
462                 *dpp = NULL;
463                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
464                 LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
465                     nfsdl_hash);
466                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
467                 nfsstatsv1.cldelegates++;
468                 nfscl_delegcnt++;
469         } else {
470                 /*
471                  * Delegation already exists, what do we do if a new one??
472                  */
473                 if (dp != NULL) {
474                         printf("Deleg already exists!\n");
475                         free(dp, M_NFSCLDELEG);
476                         *dpp = NULL;
477                 } else {
478                         *dpp = tdp;
479                 }
480         }
481         NFSUNLOCKCLSTATE();
482         return (0);
483 }
484
485 /*
486  * Find a delegation for this file handle. Return NULL upon failure.
487  */
488 static struct nfscldeleg *
489 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
490 {
491         struct nfscldeleg *dp;
492
493         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
494             if (dp->nfsdl_fhlen == fhlen &&
495                 !NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
496                 break;
497         }
498         return (dp);
499 }
500
501 /*
502  * Get a stateid for an I/O operation. First, look for an open and iff
503  * found, return either a lockowner stateid or the open stateid.
504  * If no Open is found, just return error and the special stateid of all zeros.
505  */
506 int
507 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
508     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
509     void **lckpp)
510 {
511         struct nfsclclient *clp;
512         struct nfsclowner *owp;
513         struct nfsclopen *op = NULL, *top;
514         struct nfscllockowner *lp;
515         struct nfscldeleg *dp;
516         struct nfsnode *np;
517         struct nfsmount *nmp;
518         u_int8_t own[NFSV4CL_LOCKNAMELEN];
519         int error;
520         bool done;
521
522         *lckpp = NULL;
523         /*
524          * Initially, just set the special stateid of all zeros.
525          * (Don't do this for a DS, since the special stateid can't be used.)
526          */
527         if (fords == 0) {
528                 stateidp->seqid = 0;
529                 stateidp->other[0] = 0;
530                 stateidp->other[1] = 0;
531                 stateidp->other[2] = 0;
532         }
533         if (vnode_vtype(vp) != VREG)
534                 return (EISDIR);
535         np = VTONFS(vp);
536         nmp = VFSTONFS(vp->v_mount);
537         NFSLOCKCLSTATE();
538         clp = nfscl_findcl(nmp);
539         if (clp == NULL) {
540                 NFSUNLOCKCLSTATE();
541                 return (EACCES);
542         }
543
544         /*
545          * Wait for recovery to complete.
546          */
547         while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
548                 (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
549                     PZERO, "nfsrecvr", NULL);
550
551         /*
552          * First, look for a delegation.
553          */
554         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
555                 if (dp->nfsdl_fhlen == fhlen &&
556                     !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
557                         if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
558                             (dp->nfsdl_flags & NFSCLDL_WRITE)) {
559                                 stateidp->seqid = dp->nfsdl_stateid.seqid;
560                                 stateidp->other[0] = dp->nfsdl_stateid.other[0];
561                                 stateidp->other[1] = dp->nfsdl_stateid.other[1];
562                                 stateidp->other[2] = dp->nfsdl_stateid.other[2];
563                                 if (!(np->n_flag & NDELEGRECALL)) {
564                                         TAILQ_REMOVE(&clp->nfsc_deleg, dp,
565                                             nfsdl_list);
566                                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
567                                             nfsdl_list);
568                                         dp->nfsdl_timestamp = NFSD_MONOSEC +
569                                             120;
570                                         dp->nfsdl_rwlock.nfslock_usecnt++;
571                                         *lckpp = (void *)&dp->nfsdl_rwlock;
572                                 }
573                                 NFSUNLOCKCLSTATE();
574                                 return (0);
575                         }
576                         break;
577                 }
578         }
579
580         if (p != NULL) {
581                 /*
582                  * If p != NULL, we want to search the parentage tree
583                  * for a matching OpenOwner and use that.
584                  */
585                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
586                         nfscl_filllockowner(NULL, own, F_POSIX);
587                 else
588                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
589                 lp = NULL;
590                 error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
591                     mode, &lp, &op);
592                 if (error == 0 && lp != NULL && fords == 0) {
593                         /* Don't return a lock stateid for a DS. */
594                         stateidp->seqid =
595                             lp->nfsl_stateid.seqid;
596                         stateidp->other[0] =
597                             lp->nfsl_stateid.other[0];
598                         stateidp->other[1] =
599                             lp->nfsl_stateid.other[1];
600                         stateidp->other[2] =
601                             lp->nfsl_stateid.other[2];
602                         NFSUNLOCKCLSTATE();
603                         return (0);
604                 }
605         }
606         if (op == NULL) {
607                 /* If not found, just look for any OpenOwner that will work. */
608                 top = NULL;
609                 done = false;
610                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
611                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
612                                 if (op->nfso_fhlen == fhlen &&
613                                     !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
614                                         if (top == NULL && (op->nfso_mode &
615                                             NFSV4OPEN_ACCESSWRITE) != 0 &&
616                                             (mode & NFSV4OPEN_ACCESSREAD) != 0)
617                                                 top = op;
618                                         if ((mode & op->nfso_mode) == mode) {
619                                                 done = true;
620                                                 break;
621                                         }
622                                 }
623                         }
624                         if (done)
625                                 break;
626                 }
627                 if (!done) {
628                         NFSCL_DEBUG(2, "openmode top=%p\n", top);
629                         if (top == NULL || NFSHASOPENMODE(nmp)) {
630                                 NFSUNLOCKCLSTATE();
631                                 return (ENOENT);
632                         } else
633                                 op = top;
634                 }
635                 /*
636                  * For read aheads or write behinds, use the open cred.
637                  * A read ahead or write behind is indicated by p == NULL.
638                  */
639                 if (p == NULL)
640                         newnfs_copycred(&op->nfso_cred, cred);
641         }
642
643         /*
644          * No lock stateid, so return the open stateid.
645          */
646         stateidp->seqid = op->nfso_stateid.seqid;
647         stateidp->other[0] = op->nfso_stateid.other[0];
648         stateidp->other[1] = op->nfso_stateid.other[1];
649         stateidp->other[2] = op->nfso_stateid.other[2];
650         NFSUNLOCKCLSTATE();
651         return (0);
652 }
653
654 /*
655  * Search for a matching file, mode and, optionally, lockowner.
656  */
657 static int
658 nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
659     u_int8_t *openown, u_int8_t *lockown, u_int32_t mode,
660     struct nfscllockowner **lpp, struct nfsclopen **opp)
661 {
662         struct nfsclowner *owp;
663         struct nfsclopen *op, *rop, *rop2;
664         bool keep_looping;
665
666         if (lpp != NULL)
667                 *lpp = NULL;
668         /*
669          * rop will be set to the open to be returned. There are three
670          * variants of this, all for an open of the correct file:
671          * 1 - A match of lockown.
672          * 2 - A match of the openown, when no lockown match exists.
673          * 3 - A match for any open, if no openown or lockown match exists.
674          * Looking for #2 over #3 probably isn't necessary, but since
675          * RFC3530 is vague w.r.t. the relationship between openowners and
676          * lockowners, I think this is the safer way to go.
677          */
678         rop = NULL;
679         rop2 = NULL;
680         keep_looping = true;
681         /* Search the client list */
682         LIST_FOREACH(owp, ohp, nfsow_list) {
683                 /* and look for the correct open */
684                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
685                         if (op->nfso_fhlen == fhlen &&
686                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)
687                             && (op->nfso_mode & mode) == mode)
688                                 keep_looping = nfscl_checkown(owp, op, openown,
689                                     lockown, lpp, &rop, &rop2);
690                         if (!keep_looping)
691                                 break;
692                 }
693                 if (!keep_looping)
694                         break;
695         }
696         if (rop == NULL)
697                 rop = rop2;
698         if (rop == NULL)
699                 return (EBADF);
700         *opp = rop;
701         return (0);
702 }
703
704 /* Check for an owner match. */
705 static bool
706 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
707     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
708     struct nfsclopen **ropp2)
709 {
710         struct nfscllockowner *lp;
711         bool keep_looping;
712
713         keep_looping = true;
714         if (lpp != NULL) {
715                 /* Now look for a matching lockowner. */
716                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
717                         if (!NFSBCMP(lp->nfsl_owner, lockown,
718                             NFSV4CL_LOCKNAMELEN)) {
719                                 *lpp = lp;
720                                 *ropp = op;
721                                 return (false);
722                         }
723                 }
724         }
725         if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
726             NFSV4CL_LOCKNAMELEN)) {
727                 *ropp = op;
728                 if (lpp == NULL)
729                         keep_looping = false;
730         }
731         if (*ropp2 == NULL)
732                 *ropp2 = op;
733         return (keep_looping);
734 }
735
736 /*
737  * Release use of an open owner. Called when open operations are done
738  * with the open owner.
739  */
740 void
741 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
742     __unused int error, __unused int candelete, int unlocked)
743 {
744
745         if (owp == NULL)
746                 return;
747         NFSLOCKCLSTATE();
748         if (unlocked == 0) {
749                 if (NFSHASONEOPENOWN(nmp))
750                         nfsv4_relref(&owp->nfsow_rwlock);
751                 else
752                         nfscl_lockunlock(&owp->nfsow_rwlock);
753         }
754         nfscl_clrelease(owp->nfsow_clp);
755         NFSUNLOCKCLSTATE();
756 }
757
758 /*
759  * Release use of an open structure under an open owner.
760  */
761 void
762 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
763     int candelete)
764 {
765         struct nfsclclient *clp;
766         struct nfsclowner *owp;
767
768         if (op == NULL)
769                 return;
770         NFSLOCKCLSTATE();
771         owp = op->nfso_own;
772         if (NFSHASONEOPENOWN(nmp))
773                 nfsv4_relref(&owp->nfsow_rwlock);
774         else
775                 nfscl_lockunlock(&owp->nfsow_rwlock);
776         clp = owp->nfsow_clp;
777         if (error && candelete && op->nfso_opencnt == 0)
778                 nfscl_freeopen(op, 0);
779         nfscl_clrelease(clp);
780         NFSUNLOCKCLSTATE();
781 }
782
783 /*
784  * Called to get a clientid structure. It will optionally lock the
785  * client data structures to do the SetClientId/SetClientId_confirm,
786  * but will release that lock and return the clientid with a reference
787  * count on it.
788  * If the "cred" argument is NULL, a new clientid should not be created.
789  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
790  * be done.
791  * The start_renewthread argument tells nfscl_getcl() to start a renew
792  * thread if this creates a new clp.
793  * It always clpp with a reference count on it, unless returning an error.
794  */
795 int
796 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
797     int start_renewthread, struct nfsclclient **clpp)
798 {
799         struct nfsclclient *clp;
800         struct nfsclclient *newclp = NULL;
801         struct nfsmount *nmp;
802         char uuid[HOSTUUIDLEN];
803         int igotlock = 0, error, trystalecnt, clidinusedelay, i;
804         u_int16_t idlen = 0;
805
806         nmp = VFSTONFS(mp);
807         if (cred != NULL) {
808                 getcredhostuuid(cred, uuid, sizeof uuid);
809                 idlen = strlen(uuid);
810                 if (idlen > 0)
811                         idlen += sizeof (u_int64_t);
812                 else
813                         idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
814                 newclp = malloc(
815                     sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
816                     M_WAITOK | M_ZERO);
817         }
818         NFSLOCKCLSTATE();
819         /*
820          * If a forced dismount is already in progress, don't
821          * allocate a new clientid and get out now. For the case where
822          * clp != NULL, this is a harmless optimization.
823          */
824         if (NFSCL_FORCEDISM(mp)) {
825                 NFSUNLOCKCLSTATE();
826                 if (newclp != NULL)
827                         free(newclp, M_NFSCLCLIENT);
828                 return (EBADF);
829         }
830         clp = nmp->nm_clp;
831         if (clp == NULL) {
832                 if (newclp == NULL) {
833                         NFSUNLOCKCLSTATE();
834                         return (EACCES);
835                 }
836                 clp = newclp;
837                 clp->nfsc_idlen = idlen;
838                 LIST_INIT(&clp->nfsc_owner);
839                 TAILQ_INIT(&clp->nfsc_deleg);
840                 TAILQ_INIT(&clp->nfsc_layout);
841                 LIST_INIT(&clp->nfsc_devinfo);
842                 for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
843                         LIST_INIT(&clp->nfsc_deleghash[i]);
844                 for (i = 0; i < NFSCLOPENHASHSIZE; i++)
845                         LIST_INIT(&clp->nfsc_openhash[i]);
846                 for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
847                         LIST_INIT(&clp->nfsc_layouthash[i]);
848                 clp->nfsc_flags = NFSCLFLAGS_INITED;
849                 clp->nfsc_clientidrev = 1;
850                 clp->nfsc_cbident = nfscl_nextcbident();
851                 nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
852                     clp->nfsc_idlen);
853                 LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
854                 nmp->nm_clp = clp;
855                 clp->nfsc_nmp = nmp;
856                 NFSUNLOCKCLSTATE();
857                 if (start_renewthread != 0)
858                         nfscl_start_renewthread(clp);
859         } else {
860                 NFSUNLOCKCLSTATE();
861                 if (newclp != NULL)
862                         free(newclp, M_NFSCLCLIENT);
863         }
864         NFSLOCKCLSTATE();
865         while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
866             !NFSCL_FORCEDISM(mp))
867                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
868                     NFSCLSTATEMUTEXPTR, mp);
869         if (igotlock == 0) {
870                 /*
871                  * Call nfsv4_lock() with "iwantlock == 0" so that it will
872                  * wait for a pending exclusive lock request.  This gives the
873                  * exclusive lock request priority over this shared lock
874                  * request.
875                  * An exclusive lock on nfsc_lock is used mainly for server
876                  * crash recoveries.
877                  */
878                 nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
879                 nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
880         }
881         if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
882                 /*
883                  * Both nfsv4_lock() and nfsv4_getref() know to check
884                  * for NFSCL_FORCEDISM() and return without sleeping to
885                  * wait for the exclusive lock to be released, since it
886                  * might be held by nfscl_umount() and we need to get out
887                  * now for that case and not wait until nfscl_umount()
888                  * releases it.
889                  */
890                 NFSUNLOCKCLSTATE();
891                 return (EBADF);
892         }
893         NFSUNLOCKCLSTATE();
894
895         /*
896          * If it needs a clientid, do the setclientid now.
897          */
898         if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
899                 if (!igotlock)
900                         panic("nfscl_clget");
901                 if (p == NULL || cred == NULL) {
902                         NFSLOCKCLSTATE();
903                         nfsv4_unlock(&clp->nfsc_lock, 0);
904                         NFSUNLOCKCLSTATE();
905                         return (EACCES);
906                 }
907                 /*
908                  * If RFC3530 Sec. 14.2.33 is taken literally,
909                  * NFSERR_CLIDINUSE will be returned persistently for the
910                  * case where a new mount of the same file system is using
911                  * a different principal. In practice, NFSERR_CLIDINUSE is
912                  * only returned when there is outstanding unexpired state
913                  * on the clientid. As such, try for twice the lease
914                  * interval, if we know what that is. Otherwise, make a
915                  * wild ass guess.
916                  * The case of returning NFSERR_STALECLIENTID is far less
917                  * likely, but might occur if there is a significant delay
918                  * between doing the SetClientID and SetClientIDConfirm Ops,
919                  * such that the server throws away the clientid before
920                  * receiving the SetClientIDConfirm.
921                  */
922                 if (clp->nfsc_renew > 0)
923                         clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
924                 else
925                         clidinusedelay = 120;
926                 trystalecnt = 3;
927                 do {
928                         error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
929                         if (error == NFSERR_STALECLIENTID ||
930                             error == NFSERR_STALEDONTRECOVER ||
931                             error == NFSERR_BADSESSION ||
932                             error == NFSERR_CLIDINUSE) {
933                                 (void) nfs_catnap(PZERO, error, "nfs_setcl");
934                         }
935                 } while (((error == NFSERR_STALECLIENTID ||
936                      error == NFSERR_BADSESSION ||
937                      error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
938                     (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
939                 if (error) {
940                         NFSLOCKCLSTATE();
941                         nfsv4_unlock(&clp->nfsc_lock, 0);
942                         NFSUNLOCKCLSTATE();
943                         return (error);
944                 }
945                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
946         }
947         if (igotlock) {
948                 NFSLOCKCLSTATE();
949                 nfsv4_unlock(&clp->nfsc_lock, 1);
950                 NFSUNLOCKCLSTATE();
951         }
952
953         *clpp = clp;
954         return (0);
955 }
956
957 /*
958  * Get a reference to a clientid and return it, if valid.
959  */
960 struct nfsclclient *
961 nfscl_findcl(struct nfsmount *nmp)
962 {
963         struct nfsclclient *clp;
964
965         clp = nmp->nm_clp;
966         if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
967                 return (NULL);
968         return (clp);
969 }
970
971 /*
972  * Release the clientid structure. It may be locked or reference counted.
973  */
974 static void
975 nfscl_clrelease(struct nfsclclient *clp)
976 {
977
978         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
979                 nfsv4_unlock(&clp->nfsc_lock, 0);
980         else
981                 nfsv4_relref(&clp->nfsc_lock);
982 }
983
984 /*
985  * External call for nfscl_clrelease.
986  */
987 void
988 nfscl_clientrelease(struct nfsclclient *clp)
989 {
990
991         NFSLOCKCLSTATE();
992         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
993                 nfsv4_unlock(&clp->nfsc_lock, 0);
994         else
995                 nfsv4_relref(&clp->nfsc_lock);
996         NFSUNLOCKCLSTATE();
997 }
998
999 /*
1000  * Called when wanting to lock a byte region.
1001  */
1002 int
1003 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1004     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
1005     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
1006     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1007 {
1008         struct nfscllockowner *lp;
1009         struct nfsclopen *op;
1010         struct nfsclclient *clp;
1011         struct nfscllockowner *nlp;
1012         struct nfscllock *nlop, *otherlop;
1013         struct nfscldeleg *dp = NULL, *ldp = NULL;
1014         struct nfscllockownerhead *lhp = NULL;
1015         struct nfsnode *np;
1016         u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1017         u_int8_t *openownp;
1018         int error = 0, ret, donelocally = 0;
1019         u_int32_t mode;
1020
1021         /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1022         mode = 0;
1023         np = VTONFS(vp);
1024         *lpp = NULL;
1025         lp = NULL;
1026         *newonep = 0;
1027         *donelocallyp = 0;
1028
1029         /*
1030          * Might need these, so MALLOC them now, to
1031          * avoid a tsleep() in MALLOC later.
1032          */
1033         nlp = malloc(
1034             sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1035         otherlop = malloc(
1036             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1037         nlop = malloc(
1038             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1039         nlop->nfslo_type = type;
1040         nlop->nfslo_first = off;
1041         if (len == NFS64BITSSET) {
1042                 nlop->nfslo_end = NFS64BITSSET;
1043         } else {
1044                 nlop->nfslo_end = off + len;
1045                 if (nlop->nfslo_end <= nlop->nfslo_first)
1046                         error = NFSERR_INVAL;
1047         }
1048
1049         if (!error) {
1050                 if (recovery)
1051                         clp = rclp;
1052                 else
1053                         error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1054         }
1055         if (error) {
1056                 free(nlp, M_NFSCLLOCKOWNER);
1057                 free(otherlop, M_NFSCLLOCK);
1058                 free(nlop, M_NFSCLLOCK);
1059                 return (error);
1060         }
1061
1062         op = NULL;
1063         if (recovery) {
1064                 ownp = rownp;
1065                 openownp = ropenownp;
1066         } else {
1067                 nfscl_filllockowner(id, own, flags);
1068                 ownp = own;
1069                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1070                         nfscl_filllockowner(NULL, openown, F_POSIX);
1071                 else
1072                         nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1073                 openownp = openown;
1074         }
1075         if (!recovery) {
1076                 NFSLOCKCLSTATE();
1077                 /*
1078                  * First, search for a delegation. If one exists for this file,
1079                  * the lock can be done locally against it, so long as there
1080                  * isn't a local lock conflict.
1081                  */
1082                 ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1083                     np->n_fhp->nfh_len);
1084                 /* Just sanity check for correct type of delegation */
1085                 if (dp != NULL && ((dp->nfsdl_flags &
1086                     (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1087                      (type == F_WRLCK &&
1088                       (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1089                         dp = NULL;
1090         }
1091         if (dp != NULL) {
1092                 /* Now, find an open and maybe a lockowner. */
1093                 ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh,
1094                     np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1095                 if (ret)
1096                         ret = nfscl_getopen(&clp->nfsc_owner,
1097                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1098                             ownp, mode, NULL, &op);
1099                 if (!ret) {
1100                         lhp = &dp->nfsdl_lock;
1101                         TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1102                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1103                         dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1104                         donelocally = 1;
1105                 } else {
1106                         dp = NULL;
1107                 }
1108         }
1109         if (!donelocally) {
1110                 /*
1111                  * Get the related Open and maybe lockowner.
1112                  */
1113                 error = nfscl_getopen(&clp->nfsc_owner,
1114                     np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1115                     ownp, mode, &lp, &op);
1116                 if (!error)
1117                         lhp = &op->nfso_lock;
1118         }
1119         if (!error && !recovery)
1120                 error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1121                     np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1122         if (error) {
1123                 if (!recovery) {
1124                         nfscl_clrelease(clp);
1125                         NFSUNLOCKCLSTATE();
1126                 }
1127                 free(nlp, M_NFSCLLOCKOWNER);
1128                 free(otherlop, M_NFSCLLOCK);
1129                 free(nlop, M_NFSCLLOCK);
1130                 return (error);
1131         }
1132
1133         /*
1134          * Ok, see if a lockowner exists and create one, as required.
1135          */
1136         if (lp == NULL)
1137                 LIST_FOREACH(lp, lhp, nfsl_list) {
1138                         if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1139                                 break;
1140                 }
1141         if (lp == NULL) {
1142                 NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1143                 if (recovery)
1144                         NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1145                             NFSV4CL_LOCKNAMELEN);
1146                 else
1147                         NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1148                             NFSV4CL_LOCKNAMELEN);
1149                 nlp->nfsl_seqid = 0;
1150                 nlp->nfsl_lockflags = flags;
1151                 nlp->nfsl_inprog = NULL;
1152                 nfscl_lockinit(&nlp->nfsl_rwlock);
1153                 LIST_INIT(&nlp->nfsl_lock);
1154                 if (donelocally) {
1155                         nlp->nfsl_open = NULL;
1156                         nfsstatsv1.cllocallockowners++;
1157                 } else {
1158                         nlp->nfsl_open = op;
1159                         nfsstatsv1.cllockowners++;
1160                 }
1161                 LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1162                 lp = nlp;
1163                 nlp = NULL;
1164                 *newonep = 1;
1165         }
1166
1167         /*
1168          * Now, update the byte ranges for locks.
1169          */
1170         ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1171         if (!ret)
1172                 donelocally = 1;
1173         if (donelocally) {
1174                 *donelocallyp = 1;
1175                 if (!recovery)
1176                         nfscl_clrelease(clp);
1177         } else {
1178                 /*
1179                  * Serial modifications on the lock owner for multiple threads
1180                  * for the same process using a read/write lock.
1181                  */
1182                 if (!recovery)
1183                         nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1184         }
1185         if (!recovery)
1186                 NFSUNLOCKCLSTATE();
1187
1188         if (nlp)
1189                 free(nlp, M_NFSCLLOCKOWNER);
1190         if (nlop)
1191                 free(nlop, M_NFSCLLOCK);
1192         if (otherlop)
1193                 free(otherlop, M_NFSCLLOCK);
1194
1195         *lpp = lp;
1196         return (0);
1197 }
1198
1199 /*
1200  * Called to unlock a byte range, for LockU.
1201  */
1202 int
1203 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1204     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1205     struct nfsclclient *clp, void *id, int flags,
1206     struct nfscllockowner **lpp, int *dorpcp)
1207 {
1208         struct nfscllockowner *lp;
1209         struct nfsclowner *owp;
1210         struct nfsclopen *op;
1211         struct nfscllock *nlop, *other_lop = NULL;
1212         struct nfscldeleg *dp;
1213         struct nfsnode *np;
1214         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1215         int ret = 0, fnd;
1216
1217         np = VTONFS(vp);
1218         *lpp = NULL;
1219         *dorpcp = 0;
1220
1221         /*
1222          * Might need these, so MALLOC them now, to
1223          * avoid a tsleep() in MALLOC later.
1224          */
1225         nlop = malloc(
1226             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1227         nlop->nfslo_type = F_UNLCK;
1228         nlop->nfslo_first = off;
1229         if (len == NFS64BITSSET) {
1230                 nlop->nfslo_end = NFS64BITSSET;
1231         } else {
1232                 nlop->nfslo_end = off + len;
1233                 if (nlop->nfslo_end <= nlop->nfslo_first) {
1234                         free(nlop, M_NFSCLLOCK);
1235                         return (NFSERR_INVAL);
1236                 }
1237         }
1238         if (callcnt == 0) {
1239                 other_lop = malloc(
1240                     sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1241                 *other_lop = *nlop;
1242         }
1243         nfscl_filllockowner(id, own, flags);
1244         dp = NULL;
1245         NFSLOCKCLSTATE();
1246         if (callcnt == 0)
1247                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1248                     np->n_fhp->nfh_len);
1249
1250         /*
1251          * First, unlock any local regions on a delegation.
1252          */
1253         if (dp != NULL) {
1254                 /* Look for this lockowner. */
1255                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1256                         if (!NFSBCMP(lp->nfsl_owner, own,
1257                             NFSV4CL_LOCKNAMELEN))
1258                                 break;
1259                 }
1260                 if (lp != NULL)
1261                         /* Use other_lop, so nlop is still available */
1262                         (void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1263         }
1264
1265         /*
1266          * Now, find a matching open/lockowner that hasn't already been done,
1267          * as marked by nfsl_inprog.
1268          */
1269         lp = NULL;
1270         fnd = 0;
1271         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1272             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1273                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1274                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1275                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1276                         if (lp->nfsl_inprog == NULL &&
1277                             !NFSBCMP(lp->nfsl_owner, own,
1278                              NFSV4CL_LOCKNAMELEN)) {
1279                                 fnd = 1;
1280                                 break;
1281                         }
1282                     }
1283                     if (fnd)
1284                         break;
1285                 }
1286             }
1287             if (fnd)
1288                 break;
1289         }
1290
1291         if (lp != NULL) {
1292                 ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1293                 if (ret)
1294                         *dorpcp = 1;
1295                 /*
1296                  * Serial modifications on the lock owner for multiple
1297                  * threads for the same process using a read/write lock.
1298                  */
1299                 lp->nfsl_inprog = p;
1300                 nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1301                 *lpp = lp;
1302         }
1303         NFSUNLOCKCLSTATE();
1304         if (nlop)
1305                 free(nlop, M_NFSCLLOCK);
1306         if (other_lop)
1307                 free(other_lop, M_NFSCLLOCK);
1308         return (0);
1309 }
1310
1311 /*
1312  * Release all lockowners marked in progess for this process and file.
1313  */
1314 void
1315 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1316     void *id, int flags)
1317 {
1318         struct nfsclowner *owp;
1319         struct nfsclopen *op;
1320         struct nfscllockowner *lp;
1321         struct nfsnode *np;
1322         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1323
1324         np = VTONFS(vp);
1325         nfscl_filllockowner(id, own, flags);
1326         NFSLOCKCLSTATE();
1327         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1328             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1329                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1330                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1331                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1332                         if (lp->nfsl_inprog == p &&
1333                             !NFSBCMP(lp->nfsl_owner, own,
1334                             NFSV4CL_LOCKNAMELEN)) {
1335                             lp->nfsl_inprog = NULL;
1336                             nfscl_lockunlock(&lp->nfsl_rwlock);
1337                         }
1338                     }
1339                 }
1340             }
1341         }
1342         nfscl_clrelease(clp);
1343         NFSUNLOCKCLSTATE();
1344 }
1345
1346 /*
1347  * Called to find out if any bytes within the byte range specified are
1348  * write locked by the calling process. Used to determine if flushing
1349  * is required before a LockU.
1350  * If in doubt, return 1, so the flush will occur.
1351  */
1352 int
1353 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1354     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1355 {
1356         struct nfsclowner *owp;
1357         struct nfscllockowner *lp;
1358         struct nfsclopen *op;
1359         struct nfsclclient *clp;
1360         struct nfscllock *lop;
1361         struct nfscldeleg *dp;
1362         struct nfsnode *np;
1363         u_int64_t off, end;
1364         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1365         int error = 0;
1366
1367         np = VTONFS(vp);
1368         switch (fl->l_whence) {
1369         case SEEK_SET:
1370         case SEEK_CUR:
1371                 /*
1372                  * Caller is responsible for adding any necessary offset
1373                  * when SEEK_CUR is used.
1374                  */
1375                 off = fl->l_start;
1376                 break;
1377         case SEEK_END:
1378                 off = np->n_size + fl->l_start;
1379                 break;
1380         default:
1381                 return (1);
1382         }
1383         if (fl->l_len != 0) {
1384                 end = off + fl->l_len;
1385                 if (end < off)
1386                         return (1);
1387         } else {
1388                 end = NFS64BITSSET;
1389         }
1390
1391         error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1392         if (error)
1393                 return (1);
1394         nfscl_filllockowner(id, own, flags);
1395         NFSLOCKCLSTATE();
1396
1397         /*
1398          * First check the delegation locks.
1399          */
1400         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1401         if (dp != NULL) {
1402                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1403                         if (!NFSBCMP(lp->nfsl_owner, own,
1404                             NFSV4CL_LOCKNAMELEN))
1405                                 break;
1406                 }
1407                 if (lp != NULL) {
1408                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1409                                 if (lop->nfslo_first >= end)
1410                                         break;
1411                                 if (lop->nfslo_end <= off)
1412                                         continue;
1413                                 if (lop->nfslo_type == F_WRLCK) {
1414                                         nfscl_clrelease(clp);
1415                                         NFSUNLOCKCLSTATE();
1416                                         return (1);
1417                                 }
1418                         }
1419                 }
1420         }
1421
1422         /*
1423          * Now, check state against the server.
1424          */
1425         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1426             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1427                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1428                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1429                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1430                         if (!NFSBCMP(lp->nfsl_owner, own,
1431                             NFSV4CL_LOCKNAMELEN))
1432                             break;
1433                     }
1434                     if (lp != NULL) {
1435                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1436                             if (lop->nfslo_first >= end)
1437                                 break;
1438                             if (lop->nfslo_end <= off)
1439                                 continue;
1440                             if (lop->nfslo_type == F_WRLCK) {
1441                                 nfscl_clrelease(clp);
1442                                 NFSUNLOCKCLSTATE();
1443                                 return (1);
1444                             }
1445                         }
1446                     }
1447                 }
1448             }
1449         }
1450         nfscl_clrelease(clp);
1451         NFSUNLOCKCLSTATE();
1452         return (0);
1453 }
1454
1455 /*
1456  * Release a byte range lock owner structure.
1457  */
1458 void
1459 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1460 {
1461         struct nfsclclient *clp;
1462
1463         if (lp == NULL)
1464                 return;
1465         NFSLOCKCLSTATE();
1466         clp = lp->nfsl_open->nfso_own->nfsow_clp;
1467         if (error != 0 && candelete &&
1468             (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1469                 nfscl_freelockowner(lp, 0);
1470         else
1471                 nfscl_lockunlock(&lp->nfsl_rwlock);
1472         nfscl_clrelease(clp);
1473         NFSUNLOCKCLSTATE();
1474 }
1475
1476 /*
1477  * Free up an open structure and any associated byte range lock structures.
1478  */
1479 void
1480 nfscl_freeopen(struct nfsclopen *op, int local)
1481 {
1482
1483         LIST_REMOVE(op, nfso_list);
1484         if (op->nfso_hash.le_prev != NULL)
1485                 LIST_REMOVE(op, nfso_hash);
1486         nfscl_freealllocks(&op->nfso_lock, local);
1487         free(op, M_NFSCLOPEN);
1488         if (local)
1489                 nfsstatsv1.cllocalopens--;
1490         else
1491                 nfsstatsv1.clopens--;
1492 }
1493
1494 /*
1495  * Free up all lock owners and associated locks.
1496  */
1497 static void
1498 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1499 {
1500         struct nfscllockowner *lp, *nlp;
1501
1502         LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1503                 if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1504                         panic("nfscllckw");
1505                 nfscl_freelockowner(lp, local);
1506         }
1507 }
1508
1509 /*
1510  * Called for an Open when NFSERR_EXPIRED is received from the server.
1511  * If there are no byte range locks nor a Share Deny lost, try to do a
1512  * fresh Open. Otherwise, free the open.
1513  */
1514 static int
1515 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1516     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1517 {
1518         struct nfscllockowner *lp;
1519         struct nfscldeleg *dp;
1520         int mustdelete = 0, error;
1521
1522         /*
1523          * Look for any byte range lock(s).
1524          */
1525         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1526                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
1527                         mustdelete = 1;
1528                         break;
1529                 }
1530         }
1531
1532         /*
1533          * If no byte range lock(s) nor a Share deny, try to re-open.
1534          */
1535         if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1536                 newnfs_copycred(&op->nfso_cred, cred);
1537                 dp = NULL;
1538                 error = nfsrpc_reopen(nmp, op->nfso_fh,
1539                     op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1540                 if (error) {
1541                         mustdelete = 1;
1542                         if (dp != NULL) {
1543                                 free(dp, M_NFSCLDELEG);
1544                                 dp = NULL;
1545                         }
1546                 }
1547                 if (dp != NULL)
1548                         nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1549                             op->nfso_fhlen, cred, p, &dp);
1550         }
1551
1552         /*
1553          * If a byte range lock or Share deny or couldn't re-open, free it.
1554          */
1555         if (mustdelete)
1556                 nfscl_freeopen(op, 0);
1557         return (mustdelete);
1558 }
1559
1560 /*
1561  * Free up an open owner structure.
1562  */
1563 static void
1564 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1565 {
1566
1567         LIST_REMOVE(owp, nfsow_list);
1568         free(owp, M_NFSCLOWNER);
1569         if (local)
1570                 nfsstatsv1.cllocalopenowners--;
1571         else
1572                 nfsstatsv1.clopenowners--;
1573 }
1574
1575 /*
1576  * Free up a byte range lock owner structure.
1577  */
1578 void
1579 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1580 {
1581         struct nfscllock *lop, *nlop;
1582
1583         LIST_REMOVE(lp, nfsl_list);
1584         LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1585                 nfscl_freelock(lop, local);
1586         }
1587         free(lp, M_NFSCLLOCKOWNER);
1588         if (local)
1589                 nfsstatsv1.cllocallockowners--;
1590         else
1591                 nfsstatsv1.cllockowners--;
1592 }
1593
1594 /*
1595  * Free up a byte range lock structure.
1596  */
1597 void
1598 nfscl_freelock(struct nfscllock *lop, int local)
1599 {
1600
1601         LIST_REMOVE(lop, nfslo_list);
1602         free(lop, M_NFSCLLOCK);
1603         if (local)
1604                 nfsstatsv1.cllocallocks--;
1605         else
1606                 nfsstatsv1.cllocks--;
1607 }
1608
1609 /*
1610  * Clean out the state related to a delegation.
1611  */
1612 static void
1613 nfscl_cleandeleg(struct nfscldeleg *dp)
1614 {
1615         struct nfsclowner *owp, *nowp;
1616         struct nfsclopen *op;
1617
1618         LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1619                 op = LIST_FIRST(&owp->nfsow_open);
1620                 if (op != NULL) {
1621                         if (LIST_NEXT(op, nfso_list) != NULL)
1622                                 panic("nfscleandel");
1623                         nfscl_freeopen(op, 1);
1624                 }
1625                 nfscl_freeopenowner(owp, 1);
1626         }
1627         nfscl_freealllocks(&dp->nfsdl_lock, 1);
1628 }
1629
1630 /*
1631  * Free a delegation.
1632  */
1633 static void
1634 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1635 {
1636
1637         TAILQ_REMOVE(hdp, dp, nfsdl_list);
1638         LIST_REMOVE(dp, nfsdl_hash);
1639         if (freeit)
1640                 free(dp, M_NFSCLDELEG);
1641         nfsstatsv1.cldelegates--;
1642         nfscl_delegcnt--;
1643 }
1644
1645 /*
1646  * Free up all state related to this client structure.
1647  */
1648 static void
1649 nfscl_cleanclient(struct nfsclclient *clp)
1650 {
1651         struct nfsclowner *owp, *nowp;
1652         struct nfsclopen *op, *nop;
1653         struct nfscllayout *lyp, *nlyp;
1654         struct nfscldevinfo *dip, *ndip;
1655
1656         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1657                 nfscl_freelayout(lyp);
1658
1659         LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1660                 nfscl_freedevinfo(dip);
1661
1662         /* Now, all the OpenOwners, etc. */
1663         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1664                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1665                         nfscl_freeopen(op, 0);
1666                 }
1667                 nfscl_freeopenowner(owp, 0);
1668         }
1669 }
1670
1671 /*
1672  * Called when an NFSERR_EXPIRED is received from the server.
1673  */
1674 static void
1675 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1676     struct ucred *cred, NFSPROC_T *p)
1677 {
1678         struct nfsclowner *owp, *nowp, *towp;
1679         struct nfsclopen *op, *nop, *top;
1680         struct nfscldeleg *dp, *ndp;
1681         int ret, printed = 0;
1682
1683         /*
1684          * First, merge locally issued Opens into the list for the server.
1685          */
1686         dp = TAILQ_FIRST(&clp->nfsc_deleg);
1687         while (dp != NULL) {
1688             ndp = TAILQ_NEXT(dp, nfsdl_list);
1689             owp = LIST_FIRST(&dp->nfsdl_owner);
1690             while (owp != NULL) {
1691                 nowp = LIST_NEXT(owp, nfsow_list);
1692                 op = LIST_FIRST(&owp->nfsow_open);
1693                 if (op != NULL) {
1694                     if (LIST_NEXT(op, nfso_list) != NULL)
1695                         panic("nfsclexp");
1696                     LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1697                         if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1698                             NFSV4CL_LOCKNAMELEN))
1699                             break;
1700                     }
1701                     if (towp != NULL) {
1702                         /* Merge opens in */
1703                         LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1704                             if (top->nfso_fhlen == op->nfso_fhlen &&
1705                                 !NFSBCMP(top->nfso_fh, op->nfso_fh,
1706                                  op->nfso_fhlen)) {
1707                                 top->nfso_mode |= op->nfso_mode;
1708                                 top->nfso_opencnt += op->nfso_opencnt;
1709                                 break;
1710                             }
1711                         }
1712                         if (top == NULL) {
1713                             /* Just add the open to the owner list */
1714                             LIST_REMOVE(op, nfso_list);
1715                             op->nfso_own = towp;
1716                             LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1717                             LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1718                                 op->nfso_fhlen), op, nfso_hash);
1719                             nfsstatsv1.cllocalopens--;
1720                             nfsstatsv1.clopens++;
1721                         }
1722                     } else {
1723                         /* Just add the openowner to the client list */
1724                         LIST_REMOVE(owp, nfsow_list);
1725                         owp->nfsow_clp = clp;
1726                         LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1727                         LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1728                             op->nfso_fhlen), op, nfso_hash);
1729                         nfsstatsv1.cllocalopenowners--;
1730                         nfsstatsv1.clopenowners++;
1731                         nfsstatsv1.cllocalopens--;
1732                         nfsstatsv1.clopens++;
1733                     }
1734                 }
1735                 owp = nowp;
1736             }
1737             if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1738                 printed = 1;
1739                 printf("nfsv4 expired locks lost\n");
1740             }
1741             nfscl_cleandeleg(dp);
1742             nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1743             dp = ndp;
1744         }
1745         if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1746             panic("nfsclexp");
1747
1748         /*
1749          * Now, try and reopen against the server.
1750          */
1751         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1752                 owp->nfsow_seqid = 0;
1753                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1754                         ret = nfscl_expireopen(clp, op, nmp, cred, p);
1755                         if (ret && !printed) {
1756                                 printed = 1;
1757                                 printf("nfsv4 expired locks lost\n");
1758                         }
1759                 }
1760                 if (LIST_EMPTY(&owp->nfsow_open))
1761                         nfscl_freeopenowner(owp, 0);
1762         }
1763 }
1764
1765 /*
1766  * This function must be called after the process represented by "own" has
1767  * exited. Must be called with CLSTATE lock held.
1768  */
1769 static void
1770 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1771 {
1772         struct nfsclowner *owp, *nowp;
1773         struct nfscllockowner *lp, *nlp;
1774         struct nfscldeleg *dp;
1775
1776         /* First, get rid of local locks on delegations. */
1777         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1778                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1779                     if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1780                         if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1781                             panic("nfscllckw");
1782                         nfscl_freelockowner(lp, 1);
1783                     }
1784                 }
1785         }
1786         owp = LIST_FIRST(&clp->nfsc_owner);
1787         while (owp != NULL) {
1788                 nowp = LIST_NEXT(owp, nfsow_list);
1789                 if (!NFSBCMP(owp->nfsow_owner, own,
1790                     NFSV4CL_LOCKNAMELEN)) {
1791                         /*
1792                          * If there are children that haven't closed the
1793                          * file descriptors yet, the opens will still be
1794                          * here. For that case, let the renew thread clear
1795                          * out the OpenOwner later.
1796                          */
1797                         if (LIST_EMPTY(&owp->nfsow_open))
1798                                 nfscl_freeopenowner(owp, 0);
1799                         else
1800                                 owp->nfsow_defunct = 1;
1801                 }
1802                 owp = nowp;
1803         }
1804 }
1805
1806 /*
1807  * Find open/lock owners for processes that have exited.
1808  */
1809 static void
1810 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1811 {
1812         struct nfsclowner *owp, *nowp;
1813         struct nfsclopen *op;
1814         struct nfscllockowner *lp, *nlp;
1815         struct nfscldeleg *dp;
1816
1817         /*
1818          * All the pidhash locks must be acquired, since they are sx locks
1819          * and must be acquired before the mutexes.  The pid(s) that will
1820          * be used aren't known yet, so all the locks need to be acquired.
1821          * Fortunately, this function is only performed once/sec.
1822          */
1823         pidhash_slockall();
1824         NFSLOCKCLSTATE();
1825         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1826                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1827                         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1828                                 if (LIST_EMPTY(&lp->nfsl_lock))
1829                                         nfscl_emptylockowner(lp, lhp);
1830                         }
1831                 }
1832                 if (nfscl_procdoesntexist(owp->nfsow_owner))
1833                         nfscl_cleanup_common(clp, owp->nfsow_owner);
1834         }
1835
1836         /*
1837          * For the single open_owner case, these lock owners need to be
1838          * checked to see if they still exist separately.
1839          * This is because nfscl_procdoesntexist() never returns true for
1840          * the single open_owner so that the above doesn't ever call
1841          * nfscl_cleanup_common().
1842          */
1843         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1844                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1845                         if (nfscl_procdoesntexist(lp->nfsl_owner))
1846                                 nfscl_cleanup_common(clp, lp->nfsl_owner);
1847                 }
1848         }
1849         NFSUNLOCKCLSTATE();
1850         pidhash_sunlockall();
1851 }
1852
1853 /*
1854  * Take the empty lock owner and move it to the local lhp list if the
1855  * associated process no longer exists.
1856  */
1857 static void
1858 nfscl_emptylockowner(struct nfscllockowner *lp,
1859     struct nfscllockownerfhhead *lhp)
1860 {
1861         struct nfscllockownerfh *lfhp, *mylfhp;
1862         struct nfscllockowner *nlp;
1863         int fnd_it;
1864
1865         /* If not a Posix lock owner, just return. */
1866         if ((lp->nfsl_lockflags & F_POSIX) == 0)
1867                 return;
1868
1869         fnd_it = 0;
1870         mylfhp = NULL;
1871         /*
1872          * First, search to see if this lock owner is already in the list.
1873          * If it is, then the associated process no longer exists.
1874          */
1875         SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1876                 if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1877                     !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1878                     lfhp->nfslfh_len))
1879                         mylfhp = lfhp;
1880                 LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1881                         if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1882                             NFSV4CL_LOCKNAMELEN))
1883                                 fnd_it = 1;
1884         }
1885         /* If not found, check if process still exists. */
1886         if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1887                 return;
1888
1889         /* Move the lock owner over to the local list. */
1890         if (mylfhp == NULL) {
1891                 mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1892                     M_NOWAIT);
1893                 if (mylfhp == NULL)
1894                         return;
1895                 mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1896                 NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1897                     mylfhp->nfslfh_len);
1898                 LIST_INIT(&mylfhp->nfslfh_lock);
1899                 SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1900         }
1901         LIST_REMOVE(lp, nfsl_list);
1902         LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1903 }
1904
1905 static int      fake_global;    /* Used to force visibility of MNTK_UNMOUNTF */
1906 /*
1907  * Called from nfs umount to free up the clientid.
1908  */
1909 void
1910 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1911 {
1912         struct nfsclclient *clp;
1913         struct ucred *cred;
1914         int igotlock;
1915
1916         /*
1917          * For the case that matters, this is the thread that set
1918          * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1919          * done to ensure that any thread executing nfscl_getcl() after
1920          * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1921          * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1922          * explanation, courtesy of Alan Cox.
1923          * What follows is a snippet from Alan Cox's email at:
1924          * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1925          * 
1926          * 1. Set MNTK_UNMOUNTF
1927          * 2. Acquire a standard FreeBSD mutex "m".
1928          * 3. Update some data structures.
1929          * 4. Release mutex "m".
1930          * 
1931          * Then, other threads that acquire "m" after step 4 has occurred will
1932          * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1933          * step 2 may or may not see MNTK_UNMOUNTF as set.
1934          */
1935         NFSLOCKCLSTATE();
1936         if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1937                 fake_global++;
1938                 NFSUNLOCKCLSTATE();
1939                 NFSLOCKCLSTATE();
1940         }
1941
1942         clp = nmp->nm_clp;
1943         if (clp != NULL) {
1944                 if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1945                         panic("nfscl umount");
1946
1947                 /*
1948                  * First, handshake with the nfscl renew thread, to terminate
1949                  * it.
1950                  */
1951                 clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1952                 while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1953                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1954                             "nfsclumnt", hz);
1955
1956                 /*
1957                  * Now, get the exclusive lock on the client state, so
1958                  * that no uses of the state are still in progress.
1959                  */
1960                 do {
1961                         igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1962                             NFSCLSTATEMUTEXPTR, NULL);
1963                 } while (!igotlock);
1964                 NFSUNLOCKCLSTATE();
1965
1966                 /*
1967                  * Free up all the state. It will expire on the server, but
1968                  * maybe we should do a SetClientId/SetClientIdConfirm so
1969                  * the server throws it away?
1970                  */
1971                 LIST_REMOVE(clp, nfsc_list);
1972                 nfscl_delegreturnall(clp, p);
1973                 cred = newnfs_getcred();
1974                 if (NFSHASNFSV4N(nmp)) {
1975                         (void)nfsrpc_destroysession(nmp, clp, cred, p);
1976                         (void)nfsrpc_destroyclient(nmp, clp, cred, p);
1977                 } else
1978                         (void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
1979                 nfscl_cleanclient(clp);
1980                 nmp->nm_clp = NULL;
1981                 NFSFREECRED(cred);
1982                 free(clp, M_NFSCLCLIENT);
1983         } else
1984                 NFSUNLOCKCLSTATE();
1985 }
1986
1987 /*
1988  * This function is called when a server replies with NFSERR_STALECLIENTID
1989  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
1990  * doing Opens and Locks with reclaim. If these fail, it deletes the
1991  * corresponding state.
1992  */
1993 static void
1994 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
1995     NFSPROC_T *p)
1996 {
1997         struct nfsclowner *owp, *nowp;
1998         struct nfsclopen *op, *nop;
1999         struct nfscllockowner *lp, *nlp;
2000         struct nfscllock *lop, *nlop;
2001         struct nfscldeleg *dp, *ndp, *tdp;
2002         struct nfsmount *nmp;
2003         struct ucred *tcred;
2004         struct nfsclopenhead extra_open;
2005         struct nfscldeleghead extra_deleg;
2006         struct nfsreq *rep;
2007         u_int64_t len;
2008         u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2009         int i, igotlock = 0, error, trycnt, firstlock;
2010         struct nfscllayout *lyp, *nlyp;
2011         bool recovered_one;
2012
2013         /*
2014          * First, lock the client structure, so everyone else will
2015          * block when trying to use state.
2016          */
2017         NFSLOCKCLSTATE();
2018         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2019         do {
2020                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2021                     NFSCLSTATEMUTEXPTR, NULL);
2022         } while (!igotlock);
2023         NFSUNLOCKCLSTATE();
2024
2025         nmp = clp->nfsc_nmp;
2026         if (nmp == NULL)
2027                 panic("nfscl recover");
2028
2029         /*
2030          * For now, just get rid of all layouts. There may be a need
2031          * to do LayoutCommit Ops with reclaim == true later.
2032          */
2033         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2034                 nfscl_freelayout(lyp);
2035         TAILQ_INIT(&clp->nfsc_layout);
2036         for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2037                 LIST_INIT(&clp->nfsc_layouthash[i]);
2038
2039         trycnt = 5;
2040         tcred = NULL;
2041         do {
2042                 error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2043         } while ((error == NFSERR_STALECLIENTID ||
2044              error == NFSERR_BADSESSION ||
2045              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2046         if (error) {
2047                 NFSLOCKCLSTATE();
2048                 clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2049                     NFSCLFLAGS_RECVRINPROG);
2050                 wakeup(&clp->nfsc_flags);
2051                 nfsv4_unlock(&clp->nfsc_lock, 0);
2052                 NFSUNLOCKCLSTATE();
2053                 return;
2054         }
2055         clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2056         clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2057
2058         /*
2059          * Mark requests already queued on the server, so that they don't
2060          * initiate another recovery cycle. Any requests already in the
2061          * queue that handle state information will have the old stale
2062          * clientid/stateid and will get a NFSERR_STALESTATEID,
2063          * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2064          * This will be translated to NFSERR_STALEDONTRECOVER when
2065          * R_DONTRECOVER is set.
2066          */
2067         NFSLOCKREQ();
2068         TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2069                 if (rep->r_nmp == nmp)
2070                         rep->r_flags |= R_DONTRECOVER;
2071         }
2072         NFSUNLOCKREQ();
2073
2074         /*
2075          * If nfsrpc_setclient() returns *retokp == true,
2076          * no more recovery is needed.
2077          */
2078         if (*retokp)
2079                 goto out;
2080
2081         /*
2082          * Now, mark all delegations "need reclaim".
2083          */
2084         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2085                 dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2086
2087         TAILQ_INIT(&extra_deleg);
2088         LIST_INIT(&extra_open);
2089         /*
2090          * Now traverse the state lists, doing Open and Lock Reclaims.
2091          */
2092         tcred = newnfs_getcred();
2093         recovered_one = false;
2094         owp = LIST_FIRST(&clp->nfsc_owner);
2095         while (owp != NULL) {
2096             nowp = LIST_NEXT(owp, nfsow_list);
2097             owp->nfsow_seqid = 0;
2098             op = LIST_FIRST(&owp->nfsow_open);
2099             while (op != NULL) {
2100                 nop = LIST_NEXT(op, nfso_list);
2101                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2102                     /* Search for a delegation to reclaim with the open */
2103                     TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2104                         if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2105                             continue;
2106                         if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2107                             mode = NFSV4OPEN_ACCESSWRITE;
2108                             delegtype = NFSV4OPEN_DELEGATEWRITE;
2109                         } else {
2110                             mode = NFSV4OPEN_ACCESSREAD;
2111                             delegtype = NFSV4OPEN_DELEGATEREAD;
2112                         }
2113                         if ((op->nfso_mode & mode) == mode &&
2114                             op->nfso_fhlen == dp->nfsdl_fhlen &&
2115                             !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2116                             break;
2117                     }
2118                     ndp = dp;
2119                     if (dp == NULL)
2120                         delegtype = NFSV4OPEN_DELEGATENONE;
2121                     newnfs_copycred(&op->nfso_cred, tcred);
2122                     error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2123                         op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2124                         op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2125                         tcred, p);
2126                     if (!error) {
2127                         recovered_one = true;
2128                         /* Handle any replied delegation */
2129                         if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2130                             || NFSMNT_RDONLY(nmp->nm_mountp))) {
2131                             if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2132                                 mode = NFSV4OPEN_ACCESSWRITE;
2133                             else
2134                                 mode = NFSV4OPEN_ACCESSREAD;
2135                             TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2136                                 if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2137                                     continue;
2138                                 if ((op->nfso_mode & mode) == mode &&
2139                                     op->nfso_fhlen == dp->nfsdl_fhlen &&
2140                                     !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2141                                     op->nfso_fhlen)) {
2142                                     dp->nfsdl_stateid = ndp->nfsdl_stateid;
2143                                     dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2144                                     dp->nfsdl_ace = ndp->nfsdl_ace;
2145                                     dp->nfsdl_change = ndp->nfsdl_change;
2146                                     dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2147                                     if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2148                                         dp->nfsdl_flags |= NFSCLDL_RECALL;
2149                                     free(ndp, M_NFSCLDELEG);
2150                                     ndp = NULL;
2151                                     break;
2152                                 }
2153                             }
2154                         }
2155                         if (ndp != NULL)
2156                             TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2157
2158                         /* and reclaim all byte range locks */
2159                         lp = LIST_FIRST(&op->nfso_lock);
2160                         while (lp != NULL) {
2161                             nlp = LIST_NEXT(lp, nfsl_list);
2162                             lp->nfsl_seqid = 0;
2163                             firstlock = 1;
2164                             lop = LIST_FIRST(&lp->nfsl_lock);
2165                             while (lop != NULL) {
2166                                 nlop = LIST_NEXT(lop, nfslo_list);
2167                                 if (lop->nfslo_end == NFS64BITSSET)
2168                                     len = NFS64BITSSET;
2169                                 else
2170                                     len = lop->nfslo_end - lop->nfslo_first;
2171                                 error = nfscl_trylock(nmp, NULL,
2172                                     op->nfso_fh, op->nfso_fhlen, lp,
2173                                     firstlock, 1, lop->nfslo_first, len,
2174                                     lop->nfslo_type, tcred, p);
2175                                 if (error != 0)
2176                                     nfscl_freelock(lop, 0);
2177                                 else
2178                                     firstlock = 0;
2179                                 lop = nlop;
2180                             }
2181                             /* If no locks, but a lockowner, just delete it. */
2182                             if (LIST_EMPTY(&lp->nfsl_lock))
2183                                 nfscl_freelockowner(lp, 0);
2184                             lp = nlp;
2185                         }
2186                     } else if (error == NFSERR_NOGRACE && !recovered_one &&
2187                         NFSHASNFSV4N(nmp)) {
2188                         /*
2189                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2190                          * actually end up here, since the client will do
2191                          * a recovery for NFSERR_BADSESSION, but will get
2192                          * an NFSERR_NOGRACE reply for the first "reclaim"
2193                          * attempt.
2194                          * So, call nfscl_expireclient() to recover the
2195                          * opens as best we can and then do a reclaim
2196                          * complete and return.
2197                          */
2198                         nfsrpc_reclaimcomplete(nmp, cred, p);
2199                         nfscl_expireclient(clp, nmp, tcred, p);
2200                         goto out;
2201                     }
2202                 }
2203                 if (error != 0 && error != NFSERR_BADSESSION)
2204                     nfscl_freeopen(op, 0);
2205                 op = nop;
2206             }
2207             owp = nowp;
2208         }
2209
2210         /*
2211          * Now, try and get any delegations not yet reclaimed by cobbling
2212          * to-gether an appropriate open.
2213          */
2214         nowp = NULL;
2215         dp = TAILQ_FIRST(&clp->nfsc_deleg);
2216         while (dp != NULL) {
2217             ndp = TAILQ_NEXT(dp, nfsdl_list);
2218             if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2219                 if (nowp == NULL) {
2220                     nowp = malloc(
2221                         sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2222                     /*
2223                      * Name must be as long an largest possible
2224                      * NFSV4CL_LOCKNAMELEN. 12 for now.
2225                      */
2226                     NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2227                         NFSV4CL_LOCKNAMELEN);
2228                     LIST_INIT(&nowp->nfsow_open);
2229                     nowp->nfsow_clp = clp;
2230                     nowp->nfsow_seqid = 0;
2231                     nowp->nfsow_defunct = 0;
2232                     nfscl_lockinit(&nowp->nfsow_rwlock);
2233                 }
2234                 nop = NULL;
2235                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2236                     nop = malloc(sizeof (struct nfsclopen) +
2237                         dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2238                     nop->nfso_own = nowp;
2239                     if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2240                         nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2241                         delegtype = NFSV4OPEN_DELEGATEWRITE;
2242                     } else {
2243                         nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2244                         delegtype = NFSV4OPEN_DELEGATEREAD;
2245                     }
2246                     nop->nfso_opencnt = 0;
2247                     nop->nfso_posixlock = 1;
2248                     nop->nfso_fhlen = dp->nfsdl_fhlen;
2249                     NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2250                     LIST_INIT(&nop->nfso_lock);
2251                     nop->nfso_stateid.seqid = 0;
2252                     nop->nfso_stateid.other[0] = 0;
2253                     nop->nfso_stateid.other[1] = 0;
2254                     nop->nfso_stateid.other[2] = 0;
2255                     newnfs_copycred(&dp->nfsdl_cred, tcred);
2256                     newnfs_copyincred(tcred, &nop->nfso_cred);
2257                     tdp = NULL;
2258                     error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2259                         nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2260                         nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2261                         delegtype, tcred, p);
2262                     if (tdp != NULL) {
2263                         if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2264                             mode = NFSV4OPEN_ACCESSWRITE;
2265                         else
2266                             mode = NFSV4OPEN_ACCESSREAD;
2267                         if ((nop->nfso_mode & mode) == mode &&
2268                             nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2269                             !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2270                             nop->nfso_fhlen)) {
2271                             dp->nfsdl_stateid = tdp->nfsdl_stateid;
2272                             dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2273                             dp->nfsdl_ace = tdp->nfsdl_ace;
2274                             dp->nfsdl_change = tdp->nfsdl_change;
2275                             dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2276                             if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2277                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2278                             free(tdp, M_NFSCLDELEG);
2279                         } else {
2280                             TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2281                         }
2282                     }
2283                 }
2284                 if (error) {
2285                     if (nop != NULL)
2286                         free(nop, M_NFSCLOPEN);
2287                     if (error == NFSERR_NOGRACE && !recovered_one &&
2288                         NFSHASNFSV4N(nmp)) {
2289                         /*
2290                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2291                          * actually end up here, since the client will do
2292                          * a recovery for NFSERR_BADSESSION, but will get
2293                          * an NFSERR_NOGRACE reply for the first "reclaim"
2294                          * attempt.
2295                          * So, call nfscl_expireclient() to recover the
2296                          * opens as best we can and then do a reclaim
2297                          * complete and return.
2298                          */
2299                         nfsrpc_reclaimcomplete(nmp, cred, p);
2300                         nfscl_expireclient(clp, nmp, tcred, p);
2301                         free(nowp, M_NFSCLOWNER);
2302                         goto out;
2303                     }
2304                     /*
2305                      * Couldn't reclaim it, so throw the state
2306                      * away. Ouch!!
2307                      */
2308                     nfscl_cleandeleg(dp);
2309                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2310                 } else {
2311                     recovered_one = true;
2312                     LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2313                 }
2314             }
2315             dp = ndp;
2316         }
2317
2318         /*
2319          * Now, get rid of extra Opens and Delegations.
2320          */
2321         LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2322                 do {
2323                         newnfs_copycred(&op->nfso_cred, tcred);
2324                         error = nfscl_tryclose(op, tcred, nmp, p);
2325                         if (error == NFSERR_GRACE)
2326                                 (void) nfs_catnap(PZERO, error, "nfsexcls");
2327                 } while (error == NFSERR_GRACE);
2328                 LIST_REMOVE(op, nfso_list);
2329                 free(op, M_NFSCLOPEN);
2330         }
2331         if (nowp != NULL)
2332                 free(nowp, M_NFSCLOWNER);
2333
2334         TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2335                 do {
2336                         newnfs_copycred(&dp->nfsdl_cred, tcred);
2337                         error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2338                         if (error == NFSERR_GRACE)
2339                                 (void) nfs_catnap(PZERO, error, "nfsexdlg");
2340                 } while (error == NFSERR_GRACE);
2341                 TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2342                 free(dp, M_NFSCLDELEG);
2343         }
2344
2345         /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2346         if (NFSHASNFSV4N(nmp))
2347                 (void)nfsrpc_reclaimcomplete(nmp, cred, p);
2348
2349 out:
2350         NFSLOCKCLSTATE();
2351         clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2352         wakeup(&clp->nfsc_flags);
2353         nfsv4_unlock(&clp->nfsc_lock, 0);
2354         NFSUNLOCKCLSTATE();
2355         if (tcred != NULL)
2356                 NFSFREECRED(tcred);
2357 }
2358
2359 /*
2360  * This function is called when a server replies with NFSERR_EXPIRED.
2361  * It deletes all state for the client and does a fresh SetClientId/confirm.
2362  * XXX Someday it should post a signal to the process(es) that hold the
2363  * state, so they know that lock state has been lost.
2364  */
2365 int
2366 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2367 {
2368         struct nfsmount *nmp;
2369         struct ucred *cred;
2370         int igotlock = 0, error, trycnt;
2371
2372         /*
2373          * If the clientid has gone away or a new SetClientid has already
2374          * been done, just return ok.
2375          */
2376         if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2377                 return (0);
2378
2379         /*
2380          * First, lock the client structure, so everyone else will
2381          * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2382          * that only one thread does the work.
2383          */
2384         NFSLOCKCLSTATE();
2385         clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2386         do {
2387                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2388                     NFSCLSTATEMUTEXPTR, NULL);
2389         } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2390         if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2391                 if (igotlock)
2392                         nfsv4_unlock(&clp->nfsc_lock, 0);
2393                 NFSUNLOCKCLSTATE();
2394                 return (0);
2395         }
2396         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2397         NFSUNLOCKCLSTATE();
2398
2399         nmp = clp->nfsc_nmp;
2400         if (nmp == NULL)
2401                 panic("nfscl expired");
2402         cred = newnfs_getcred();
2403         trycnt = 5;
2404         do {
2405                 error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2406         } while ((error == NFSERR_STALECLIENTID ||
2407              error == NFSERR_BADSESSION ||
2408              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2409         if (error) {
2410                 NFSLOCKCLSTATE();
2411                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2412         } else {
2413                 /*
2414                  * Expire the state for the client.
2415                  */
2416                 nfscl_expireclient(clp, nmp, cred, p);
2417                 NFSLOCKCLSTATE();
2418                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2419                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2420         }
2421         clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2422         wakeup(&clp->nfsc_flags);
2423         nfsv4_unlock(&clp->nfsc_lock, 0);
2424         NFSUNLOCKCLSTATE();
2425         NFSFREECRED(cred);
2426         return (error);
2427 }
2428
2429 /*
2430  * This function inserts a lock in the list after insert_lop.
2431  */
2432 static void
2433 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2434     struct nfscllock *insert_lop, int local)
2435 {
2436
2437         if ((struct nfscllockowner *)insert_lop == lp)
2438                 LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2439         else
2440                 LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2441         if (local)
2442                 nfsstatsv1.cllocallocks++;
2443         else
2444                 nfsstatsv1.cllocks++;
2445 }
2446
2447 /*
2448  * This function updates the locking for a lock owner and given file. It
2449  * maintains a list of lock ranges ordered on increasing file offset that
2450  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2451  * It always adds new_lop to the list and sometimes uses the one pointed
2452  * at by other_lopp.
2453  * Returns 1 if the locks were modified, 0 otherwise.
2454  */
2455 static int
2456 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2457     struct nfscllock **other_lopp, int local)
2458 {
2459         struct nfscllock *new_lop = *new_lopp;
2460         struct nfscllock *lop, *tlop, *ilop;
2461         struct nfscllock *other_lop;
2462         int unlock = 0, modified = 0;
2463         u_int64_t tmp;
2464
2465         /*
2466          * Work down the list until the lock is merged.
2467          */
2468         if (new_lop->nfslo_type == F_UNLCK)
2469                 unlock = 1;
2470         ilop = (struct nfscllock *)lp;
2471         lop = LIST_FIRST(&lp->nfsl_lock);
2472         while (lop != NULL) {
2473             /*
2474              * Only check locks for this file that aren't before the start of
2475              * new lock's range.
2476              */
2477             if (lop->nfslo_end >= new_lop->nfslo_first) {
2478                 if (new_lop->nfslo_end < lop->nfslo_first) {
2479                     /*
2480                      * If the new lock ends before the start of the
2481                      * current lock's range, no merge, just insert
2482                      * the new lock.
2483                      */
2484                     break;
2485                 }
2486                 if (new_lop->nfslo_type == lop->nfslo_type ||
2487                     (new_lop->nfslo_first <= lop->nfslo_first &&
2488                      new_lop->nfslo_end >= lop->nfslo_end)) {
2489                     /*
2490                      * This lock can be absorbed by the new lock/unlock.
2491                      * This happens when it covers the entire range
2492                      * of the old lock or is contiguous
2493                      * with the old lock and is of the same type or an
2494                      * unlock.
2495                      */
2496                     if (new_lop->nfslo_type != lop->nfslo_type ||
2497                         new_lop->nfslo_first != lop->nfslo_first ||
2498                         new_lop->nfslo_end != lop->nfslo_end)
2499                         modified = 1;
2500                     if (lop->nfslo_first < new_lop->nfslo_first)
2501                         new_lop->nfslo_first = lop->nfslo_first;
2502                     if (lop->nfslo_end > new_lop->nfslo_end)
2503                         new_lop->nfslo_end = lop->nfslo_end;
2504                     tlop = lop;
2505                     lop = LIST_NEXT(lop, nfslo_list);
2506                     nfscl_freelock(tlop, local);
2507                     continue;
2508                 }
2509
2510                 /*
2511                  * All these cases are for contiguous locks that are not the
2512                  * same type, so they can't be merged.
2513                  */
2514                 if (new_lop->nfslo_first <= lop->nfslo_first) {
2515                     /*
2516                      * This case is where the new lock overlaps with the
2517                      * first part of the old lock. Move the start of the
2518                      * old lock to just past the end of the new lock. The
2519                      * new lock will be inserted in front of the old, since
2520                      * ilop hasn't been updated. (We are done now.)
2521                      */
2522                     if (lop->nfslo_first != new_lop->nfslo_end) {
2523                         lop->nfslo_first = new_lop->nfslo_end;
2524                         modified = 1;
2525                     }
2526                     break;
2527                 }
2528                 if (new_lop->nfslo_end >= lop->nfslo_end) {
2529                     /*
2530                      * This case is where the new lock overlaps with the
2531                      * end of the old lock's range. Move the old lock's
2532                      * end to just before the new lock's first and insert
2533                      * the new lock after the old lock.
2534                      * Might not be done yet, since the new lock could
2535                      * overlap further locks with higher ranges.
2536                      */
2537                     if (lop->nfslo_end != new_lop->nfslo_first) {
2538                         lop->nfslo_end = new_lop->nfslo_first;
2539                         modified = 1;
2540                     }
2541                     ilop = lop;
2542                     lop = LIST_NEXT(lop, nfslo_list);
2543                     continue;
2544                 }
2545                 /*
2546                  * The final case is where the new lock's range is in the
2547                  * middle of the current lock's and splits the current lock
2548                  * up. Use *other_lopp to handle the second part of the
2549                  * split old lock range. (We are done now.)
2550                  * For unlock, we use new_lop as other_lop and tmp, since
2551                  * other_lop and new_lop are the same for this case.
2552                  * We noted the unlock case above, so we don't need
2553                  * new_lop->nfslo_type any longer.
2554                  */
2555                 tmp = new_lop->nfslo_first;
2556                 if (unlock) {
2557                     other_lop = new_lop;
2558                     *new_lopp = NULL;
2559                 } else {
2560                     other_lop = *other_lopp;
2561                     *other_lopp = NULL;
2562                 }
2563                 other_lop->nfslo_first = new_lop->nfslo_end;
2564                 other_lop->nfslo_end = lop->nfslo_end;
2565                 other_lop->nfslo_type = lop->nfslo_type;
2566                 lop->nfslo_end = tmp;
2567                 nfscl_insertlock(lp, other_lop, lop, local);
2568                 ilop = lop;
2569                 modified = 1;
2570                 break;
2571             }
2572             ilop = lop;
2573             lop = LIST_NEXT(lop, nfslo_list);
2574             if (lop == NULL)
2575                 break;
2576         }
2577
2578         /*
2579          * Insert the new lock in the list at the appropriate place.
2580          */
2581         if (!unlock) {
2582                 nfscl_insertlock(lp, new_lop, ilop, local);
2583                 *new_lopp = NULL;
2584                 modified = 1;
2585         }
2586         return (modified);
2587 }
2588
2589 /*
2590  * This function must be run as a kernel thread.
2591  * It does Renew Ops and recovery, when required.
2592  */
2593 void
2594 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2595 {
2596         struct nfsclowner *owp, *nowp;
2597         struct nfsclopen *op;
2598         struct nfscllockowner *lp, *nlp;
2599         struct nfscldeleghead dh;
2600         struct nfscldeleg *dp, *ndp;
2601         struct ucred *cred;
2602         u_int32_t clidrev;
2603         int error, cbpathdown, islept, igotlock, ret, clearok;
2604         uint32_t recover_done_time = 0;
2605         time_t mytime;
2606         static time_t prevsec = 0;
2607         struct nfscllockownerfh *lfhp, *nlfhp;
2608         struct nfscllockownerfhhead lfh;
2609         struct nfscllayout *lyp, *nlyp;
2610         struct nfscldevinfo *dip, *ndip;
2611         struct nfscllayouthead rlh;
2612         struct nfsclrecalllayout *recallp;
2613         struct nfsclds *dsp;
2614         bool retok;
2615         struct mount *mp;
2616         vnode_t vp;
2617
2618         cred = newnfs_getcred();
2619         NFSLOCKCLSTATE();
2620         clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2621         mp = clp->nfsc_nmp->nm_mountp;
2622         NFSUNLOCKCLSTATE();
2623         for(;;) {
2624                 newnfs_setroot(cred);
2625                 cbpathdown = 0;
2626                 if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2627                         /*
2628                          * Only allow one full recover within 1/2 of the lease
2629                          * duration (nfsc_renew).
2630                          * retok is value/result.  If passed in set to true,
2631                          * it indicates only a CreateSession operation should
2632                          * be attempted.
2633                          * If it is returned true, it indicates that the
2634                          * recovery only required a CreateSession.
2635                          */
2636                         retok = true;
2637                         if (recover_done_time < NFSD_MONOSEC) {
2638                                 recover_done_time = NFSD_MONOSEC +
2639                                     clp->nfsc_renew;
2640                                 retok = false;
2641                         }
2642                         NFSCL_DEBUG(1, "Doing recovery, only "
2643                             "createsession=%d\n", retok);
2644                         nfscl_recover(clp, &retok, cred, p);
2645                 }
2646                 if (clp->nfsc_expire <= NFSD_MONOSEC &&
2647                     (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2648                         clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2649                         clidrev = clp->nfsc_clientidrev;
2650                         error = nfsrpc_renew(clp, NULL, cred, p);
2651                         if (error == NFSERR_CBPATHDOWN)
2652                             cbpathdown = 1;
2653                         else if (error == NFSERR_STALECLIENTID ||
2654                             error == NFSERR_BADSESSION) {
2655                             NFSLOCKCLSTATE();
2656                             clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2657                             NFSUNLOCKCLSTATE();
2658                         } else if (error == NFSERR_EXPIRED)
2659                             (void) nfscl_hasexpired(clp, clidrev, p);
2660                 }
2661
2662 checkdsrenew:
2663                 if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2664                         /* Do renews for any DS sessions. */
2665                         NFSLOCKMNT(clp->nfsc_nmp);
2666                         /* Skip first entry, since the MDS is handled above. */
2667                         dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2668                         if (dsp != NULL)
2669                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2670                         while (dsp != NULL) {
2671                                 if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2672                                     dsp->nfsclds_sess.nfsess_defunct == 0) {
2673                                         dsp->nfsclds_expire = NFSD_MONOSEC +
2674                                             clp->nfsc_renew;
2675                                         NFSUNLOCKMNT(clp->nfsc_nmp);
2676                                         (void)nfsrpc_renew(clp, dsp, cred, p);
2677                                         goto checkdsrenew;
2678                                 }
2679                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2680                         }
2681                         NFSUNLOCKMNT(clp->nfsc_nmp);
2682                 }
2683
2684                 TAILQ_INIT(&dh);
2685                 NFSLOCKCLSTATE();
2686                 if (cbpathdown)
2687                         /* It's a Total Recall! */
2688                         nfscl_totalrecall(clp);
2689
2690                 /*
2691                  * Now, handle defunct owners.
2692                  */
2693                 LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2694                         if (LIST_EMPTY(&owp->nfsow_open)) {
2695                                 if (owp->nfsow_defunct != 0)
2696                                         nfscl_freeopenowner(owp, 0);
2697                         }
2698                 }
2699
2700                 /*
2701                  * Do the recall on any delegations. To avoid trouble, always
2702                  * come back up here after having slept.
2703                  */
2704                 igotlock = 0;
2705 tryagain:
2706                 dp = TAILQ_FIRST(&clp->nfsc_deleg);
2707                 while (dp != NULL) {
2708                         ndp = TAILQ_NEXT(dp, nfsdl_list);
2709                         if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2710                                 /*
2711                                  * Wait for outstanding I/O ops to be done.
2712                                  */
2713                                 if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2714                                     if (igotlock) {
2715                                         nfsv4_unlock(&clp->nfsc_lock, 0);
2716                                         igotlock = 0;
2717                                     }
2718                                     dp->nfsdl_rwlock.nfslock_lock |=
2719                                         NFSV4LOCK_WANTED;
2720                                     msleep(&dp->nfsdl_rwlock,
2721                                         NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2722                                         5 * hz);
2723                                     if (NFSCL_FORCEDISM(mp))
2724                                         goto terminate;
2725                                     goto tryagain;
2726                                 }
2727                                 while (!igotlock) {
2728                                     igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2729                                         &islept, NFSCLSTATEMUTEXPTR, mp);
2730                                     if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2731                                         goto terminate;
2732                                     if (islept)
2733                                         goto tryagain;
2734                                 }
2735                                 NFSUNLOCKCLSTATE();
2736                                 newnfs_copycred(&dp->nfsdl_cred, cred);
2737                                 ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2738                                     NULL, cred, p, 1, &vp);
2739                                 if (!ret) {
2740                                     nfscl_cleandeleg(dp);
2741                                     TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2742                                         nfsdl_list);
2743                                     LIST_REMOVE(dp, nfsdl_hash);
2744                                     TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2745                                     nfscl_delegcnt--;
2746                                     nfsstatsv1.cldelegates--;
2747                                 }
2748                                 NFSLOCKCLSTATE();
2749                                 /*
2750                                  * The nfsc_lock must be released before doing
2751                                  * vrele(), since it might call nfs_inactive().
2752                                  * For the unlikely case where the vnode failed
2753                                  * to be acquired by nfscl_recalldeleg(), a
2754                                  * VOP_RECLAIM() should be in progress and it
2755                                  * will return the delegation.
2756                                  */
2757                                 nfsv4_unlock(&clp->nfsc_lock, 0);
2758                                 igotlock = 0;
2759                                 if (vp != NULL) {
2760                                         NFSUNLOCKCLSTATE();
2761                                         vrele(vp);
2762                                         NFSLOCKCLSTATE();
2763                                 }
2764                                 goto tryagain;
2765                         }
2766                         dp = ndp;
2767                 }
2768
2769                 /*
2770                  * Clear out old delegations, if we are above the high water
2771                  * mark. Only clear out ones with no state related to them.
2772                  * The tailq list is in LRU order.
2773                  */
2774                 dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2775                 while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2776                     ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2777                     if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2778                         dp->nfsdl_rwlock.nfslock_lock == 0 &&
2779                         dp->nfsdl_timestamp < NFSD_MONOSEC &&
2780                         (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2781                           NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2782                         clearok = 1;
2783                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2784                             op = LIST_FIRST(&owp->nfsow_open);
2785                             if (op != NULL) {
2786                                 clearok = 0;
2787                                 break;
2788                             }
2789                         }
2790                         if (clearok) {
2791                             LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2792                                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
2793                                     clearok = 0;
2794                                     break;
2795                                 }
2796                             }
2797                         }
2798                         if (clearok) {
2799                             TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2800                             LIST_REMOVE(dp, nfsdl_hash);
2801                             TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2802                             nfscl_delegcnt--;
2803                             nfsstatsv1.cldelegates--;
2804                         }
2805                     }
2806                     dp = ndp;
2807                 }
2808                 if (igotlock)
2809                         nfsv4_unlock(&clp->nfsc_lock, 0);
2810
2811                 /*
2812                  * Do the recall on any layouts. To avoid trouble, always
2813                  * come back up here after having slept.
2814                  */
2815                 TAILQ_INIT(&rlh);
2816 tryagain2:
2817                 TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2818                         if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2819                                 /*
2820                                  * Wait for outstanding I/O ops to be done.
2821                                  */
2822                                 if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2823                                     (lyp->nfsly_lock.nfslock_lock &
2824                                      NFSV4LOCK_LOCK) != 0) {
2825                                         lyp->nfsly_lock.nfslock_lock |=
2826                                             NFSV4LOCK_WANTED;
2827                                         msleep(&lyp->nfsly_lock.nfslock_lock,
2828                                             NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2829                                             5 * hz);
2830                                         if (NFSCL_FORCEDISM(mp))
2831                                             goto terminate;
2832                                         goto tryagain2;
2833                                 }
2834                                 /* Move the layout to the recall list. */
2835                                 TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2836                                     nfsly_list);
2837                                 LIST_REMOVE(lyp, nfsly_hash);
2838                                 TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2839
2840                                 /* Handle any layout commits. */
2841                                 if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2842                                     (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2843                                         lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2844                                         NFSUNLOCKCLSTATE();
2845                                         NFSCL_DEBUG(3, "do layoutcommit\n");
2846                                         nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2847                                             cred, p);
2848                                         NFSLOCKCLSTATE();
2849                                         goto tryagain2;
2850                                 }
2851                         }
2852                 }
2853
2854                 /* Now, look for stale layouts. */
2855                 lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2856                 while (lyp != NULL) {
2857                         nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2858                         if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2859                             (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2860                             lyp->nfsly_lock.nfslock_usecnt == 0 &&
2861                             lyp->nfsly_lock.nfslock_lock == 0) {
2862                                 NFSCL_DEBUG(4, "ret stale lay=%d\n",
2863                                     nfscl_layoutcnt);
2864                                 recallp = malloc(sizeof(*recallp),
2865                                     M_NFSLAYRECALL, M_NOWAIT);
2866                                 if (recallp == NULL)
2867                                         break;
2868                                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2869                                     lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2870                                     lyp->nfsly_stateid.seqid, 0, 0, NULL,
2871                                     recallp);
2872                         }
2873                         lyp = nlyp;
2874                 }
2875
2876                 /*
2877                  * Free up any unreferenced device info structures.
2878                  */
2879                 LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2880                         if (dip->nfsdi_layoutrefs == 0 &&
2881                             dip->nfsdi_refcnt == 0) {
2882                                 NFSCL_DEBUG(4, "freeing devinfo\n");
2883                                 LIST_REMOVE(dip, nfsdi_list);
2884                                 nfscl_freedevinfo(dip);
2885                         }
2886                 }
2887                 NFSUNLOCKCLSTATE();
2888
2889                 /* Do layout return(s), as required. */
2890                 TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2891                         TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2892                         NFSCL_DEBUG(4, "ret layout\n");
2893                         nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2894                         nfscl_freelayout(lyp);
2895                 }
2896
2897                 /*
2898                  * Delegreturn any delegations cleaned out or recalled.
2899                  */
2900                 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2901                         newnfs_copycred(&dp->nfsdl_cred, cred);
2902                         (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2903                         TAILQ_REMOVE(&dh, dp, nfsdl_list);
2904                         free(dp, M_NFSCLDELEG);
2905                 }
2906
2907                 SLIST_INIT(&lfh);
2908                 /*
2909                  * Call nfscl_cleanupkext() once per second to check for
2910                  * open/lock owners where the process has exited.
2911                  */
2912                 mytime = NFSD_MONOSEC;
2913                 if (prevsec != mytime) {
2914                         prevsec = mytime;
2915                         nfscl_cleanupkext(clp, &lfh);
2916                 }
2917
2918                 /*
2919                  * Do a ReleaseLockOwner for all lock owners where the
2920                  * associated process no longer exists, as found by
2921                  * nfscl_cleanupkext().
2922                  */
2923                 newnfs_setroot(cred);
2924                 SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2925                         LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2926                             nlp) {
2927                                 (void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2928                                     lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2929                                     p);
2930                                 nfscl_freelockowner(lp, 0);
2931                         }
2932                         free(lfhp, M_TEMP);
2933                 }
2934                 SLIST_INIT(&lfh);
2935
2936                 NFSLOCKCLSTATE();
2937                 if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2938                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2939                             hz);
2940 terminate:
2941                 if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2942                         clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2943                         NFSUNLOCKCLSTATE();
2944                         NFSFREECRED(cred);
2945                         wakeup((caddr_t)clp);
2946                         return;
2947                 }
2948                 NFSUNLOCKCLSTATE();
2949         }
2950 }
2951
2952 /*
2953  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2954  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2955  */
2956 void
2957 nfscl_initiate_recovery(struct nfsclclient *clp)
2958 {
2959
2960         if (clp == NULL)
2961                 return;
2962         NFSLOCKCLSTATE();
2963         clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2964         NFSUNLOCKCLSTATE();
2965         wakeup((caddr_t)clp);
2966 }
2967
2968 /*
2969  * Dump out the state stuff for debugging.
2970  */
2971 void
2972 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2973     int lockowner, int locks)
2974 {
2975         struct nfsclclient *clp;
2976         struct nfsclowner *owp;
2977         struct nfsclopen *op;
2978         struct nfscllockowner *lp;
2979         struct nfscllock *lop;
2980         struct nfscldeleg *dp;
2981
2982         clp = nmp->nm_clp;
2983         if (clp == NULL) {
2984                 printf("nfscl dumpstate NULL clp\n");
2985                 return;
2986         }
2987         NFSLOCKCLSTATE();
2988         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2989           LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2990             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2991                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2992                     owp->nfsow_owner[0], owp->nfsow_owner[1],
2993                     owp->nfsow_owner[2], owp->nfsow_owner[3],
2994                     owp->nfsow_seqid);
2995             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2996                 if (opens)
2997                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2998                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2999                         op->nfso_stateid.other[2], op->nfso_opencnt,
3000                         op->nfso_fh[12]);
3001                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3002                     if (lockowner)
3003                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3004                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3005                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3006                             lp->nfsl_seqid,
3007                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3008                             lp->nfsl_stateid.other[2]);
3009                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3010                         if (locks)
3011 #ifdef __FreeBSD__
3012                             printf("lck typ=%d fst=%ju end=%ju\n",
3013                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3014                                 (intmax_t)lop->nfslo_end);
3015 #else
3016                             printf("lck typ=%d fst=%qd end=%qd\n",
3017                                 lop->nfslo_type, lop->nfslo_first,
3018                                 lop->nfslo_end);
3019 #endif
3020                     }
3021                 }
3022             }
3023           }
3024         }
3025         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3026             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3027                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3028                     owp->nfsow_owner[0], owp->nfsow_owner[1],
3029                     owp->nfsow_owner[2], owp->nfsow_owner[3],
3030                     owp->nfsow_seqid);
3031             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3032                 if (opens)
3033                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3034                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3035                         op->nfso_stateid.other[2], op->nfso_opencnt,
3036                         op->nfso_fh[12]);
3037                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3038                     if (lockowner)
3039                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3040                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3041                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3042                             lp->nfsl_seqid,
3043                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3044                             lp->nfsl_stateid.other[2]);
3045                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3046                         if (locks)
3047 #ifdef __FreeBSD__
3048                             printf("lck typ=%d fst=%ju end=%ju\n",
3049                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3050                                 (intmax_t)lop->nfslo_end);
3051 #else
3052                             printf("lck typ=%d fst=%qd end=%qd\n",
3053                                 lop->nfslo_type, lop->nfslo_first,
3054                                 lop->nfslo_end);
3055 #endif
3056                     }
3057                 }
3058             }
3059         }
3060         NFSUNLOCKCLSTATE();
3061 }
3062
3063 /*
3064  * Check for duplicate open owners and opens.
3065  * (Only used as a diagnostic aid.)
3066  */
3067 void
3068 nfscl_dupopen(vnode_t vp, int dupopens)
3069 {
3070         struct nfsclclient *clp;
3071         struct nfsclowner *owp, *owp2;
3072         struct nfsclopen *op, *op2;
3073         struct nfsfh *nfhp;
3074
3075         clp = VFSTONFS(vp->v_mount)->nm_clp;
3076         if (clp == NULL) {
3077                 printf("nfscl dupopen NULL clp\n");
3078                 return;
3079         }
3080         nfhp = VTONFS(vp)->n_fhp;
3081         NFSLOCKCLSTATE();
3082
3083         /*
3084          * First, search for duplicate owners.
3085          * These should never happen!
3086          */
3087         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3088             LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3089                 if (owp != owp2 &&
3090                     !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3091                     NFSV4CL_LOCKNAMELEN)) {
3092                         NFSUNLOCKCLSTATE();
3093                         printf("DUP OWNER\n");
3094                         nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3095                         return;
3096                 }
3097             }
3098         }
3099
3100         /*
3101          * Now, search for duplicate stateids.
3102          * These shouldn't happen, either.
3103          */
3104         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3105             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3106                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3107                     LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3108                         if (op != op2 &&
3109                             (op->nfso_stateid.other[0] != 0 ||
3110                              op->nfso_stateid.other[1] != 0 ||
3111                              op->nfso_stateid.other[2] != 0) &&
3112                             op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3113                             op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3114                             op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3115                             NFSUNLOCKCLSTATE();
3116                             printf("DUP STATEID\n");
3117                             nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3118                             return;
3119                         }
3120                     }
3121                 }
3122             }
3123         }
3124
3125         /*
3126          * Now search for duplicate opens.
3127          * Duplicate opens for the same owner
3128          * should never occur. Other duplicates are
3129          * possible and are checked for if "dupopens"
3130          * is true.
3131          */
3132         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3133             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3134                 if (nfhp->nfh_len == op2->nfso_fhlen &&
3135                     !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3136                     LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3137                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3138                             if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3139                                 !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3140                                 (!NFSBCMP(op->nfso_own->nfsow_owner,
3141                                  op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3142                                  dupopens)) {
3143                                 if (!NFSBCMP(op->nfso_own->nfsow_owner,
3144                                     op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3145                                     NFSUNLOCKCLSTATE();
3146                                     printf("BADDUP OPEN\n");
3147                                 } else {
3148                                     NFSUNLOCKCLSTATE();
3149                                     printf("DUP OPEN\n");
3150                                 }
3151                                 nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3152                                     0);
3153                                 return;
3154                             }
3155                         }
3156                     }
3157                 }
3158             }
3159         }
3160         NFSUNLOCKCLSTATE();
3161 }
3162
3163 /*
3164  * During close, find an open that needs to be dereferenced and
3165  * dereference it. If there are no more opens for this file,
3166  * log a message to that effect.
3167  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3168  * on the file's vnode.
3169  * This is the safe way, since it is difficult to identify
3170  * which open the close is for and I/O can be performed after the
3171  * close(2) system call when a file is mmap'd.
3172  * If it returns 0 for success, there will be a referenced
3173  * clp returned via clpp.
3174  */
3175 int
3176 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3177 {
3178         struct nfsclclient *clp;
3179         struct nfsclowner *owp;
3180         struct nfsclopen *op;
3181         struct nfscldeleg *dp;
3182         struct nfsfh *nfhp;
3183         int error, notdecr;
3184
3185         error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3186         if (error)
3187                 return (error);
3188         *clpp = clp;
3189
3190         nfhp = VTONFS(vp)->n_fhp;
3191         notdecr = 1;
3192         NFSLOCKCLSTATE();
3193         /*
3194          * First, look for one under a delegation that was locally issued
3195          * and just decrement the opencnt for it. Since all my Opens against
3196          * the server are DENY_NONE, I don't see a problem with hanging
3197          * onto them. (It is much easier to use one of the extant Opens
3198          * that I already have on the server when a Delegation is recalled
3199          * than to do fresh Opens.) Someday, I might need to rethink this, but.
3200          */
3201         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3202         if (dp != NULL) {
3203                 LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3204                         op = LIST_FIRST(&owp->nfsow_open);
3205                         if (op != NULL) {
3206                                 /*
3207                                  * Since a delegation is for a file, there
3208                                  * should never be more than one open for
3209                                  * each openowner.
3210                                  */
3211                                 if (LIST_NEXT(op, nfso_list) != NULL)
3212                                         panic("nfscdeleg opens");
3213                                 if (notdecr && op->nfso_opencnt > 0) {
3214                                         notdecr = 0;
3215                                         op->nfso_opencnt--;
3216                                         break;
3217                                 }
3218                         }
3219                 }
3220         }
3221
3222         /* Now process the opens against the server. */
3223         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3224                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3225                         if (op->nfso_fhlen == nfhp->nfh_len &&
3226                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3227                             nfhp->nfh_len)) {
3228                                 /* Found an open, decrement cnt if possible */
3229                                 if (notdecr && op->nfso_opencnt > 0) {
3230                                         notdecr = 0;
3231                                         op->nfso_opencnt--;
3232                                 }
3233                                 /*
3234                                  * There are more opens, so just return.
3235                                  */
3236                                 if (op->nfso_opencnt > 0) {
3237                                         NFSUNLOCKCLSTATE();
3238                                         return (0);
3239                                 }
3240                         }
3241                 }
3242         }
3243         NFSUNLOCKCLSTATE();
3244         if (notdecr)
3245                 printf("nfscl: never fnd open\n");
3246         return (0);
3247 }
3248
3249 int
3250 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3251 {
3252         struct nfsclclient *clp;
3253         struct nfsclowner *owp, *nowp;
3254         struct nfsclopen *op;
3255         struct nfscldeleg *dp;
3256         struct nfsfh *nfhp;
3257         struct nfsclrecalllayout *recallp;
3258         int error;
3259
3260         error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3261         if (error)
3262                 return (error);
3263         *clpp = clp;
3264
3265         nfhp = VTONFS(vp)->n_fhp;
3266         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3267         NFSLOCKCLSTATE();
3268         /*
3269          * First get rid of the local Open structures, which should be no
3270          * longer in use.
3271          */
3272         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3273         if (dp != NULL) {
3274                 LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3275                         op = LIST_FIRST(&owp->nfsow_open);
3276                         if (op != NULL) {
3277                                 KASSERT((op->nfso_opencnt == 0),
3278                                     ("nfscl: bad open cnt on deleg"));
3279                                 nfscl_freeopen(op, 1);
3280                         }
3281                         nfscl_freeopenowner(owp, 1);
3282                 }
3283         }
3284
3285         /* Return any layouts marked return on close. */
3286         nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3287
3288         /* Now process the opens against the server. */
3289 lookformore:
3290         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3291                 op = LIST_FIRST(&owp->nfsow_open);
3292                 while (op != NULL) {
3293                         if (op->nfso_fhlen == nfhp->nfh_len &&
3294                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3295                             nfhp->nfh_len)) {
3296                                 /* Found an open, close it. */
3297 #ifdef DIAGNOSTIC
3298                                 KASSERT((op->nfso_opencnt == 0),
3299                                     ("nfscl: bad open cnt on server (%d)",
3300                                      op->nfso_opencnt));
3301 #endif
3302                                 NFSUNLOCKCLSTATE();
3303                                 nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3304                                 NFSLOCKCLSTATE();
3305                                 goto lookformore;
3306                         }
3307                         op = LIST_NEXT(op, nfso_list);
3308                 }
3309         }
3310         NFSUNLOCKCLSTATE();
3311         /*
3312          * recallp has been set NULL by nfscl_retoncloselayout() if it was
3313          * used by the function, but calling free() with a NULL pointer is ok.
3314          */
3315         free(recallp, M_NFSLAYRECALL);
3316         return (0);
3317 }
3318
3319 /*
3320  * Return all delegations on this client.
3321  * (Must be called with client sleep lock.)
3322  */
3323 static void
3324 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3325 {
3326         struct nfscldeleg *dp, *ndp;
3327         struct ucred *cred;
3328
3329         cred = newnfs_getcred();
3330         TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3331                 nfscl_cleandeleg(dp);
3332                 (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3333                 nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3334         }
3335         NFSFREECRED(cred);
3336 }
3337
3338 /*
3339  * Return any delegation for this vp.
3340  */
3341 void
3342 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3343 {
3344         struct nfsclclient *clp;
3345         struct nfscldeleg *dp;
3346         struct ucred *cred;
3347         struct nfsnode *np;
3348
3349         np = VTONFS(vp);
3350         cred = newnfs_getcred();
3351         dp = NULL;
3352         NFSLOCKCLSTATE();
3353         clp = VFSTONFS(vp->v_mount)->nm_clp;
3354         if (clp != NULL)
3355                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3356                     np->n_fhp->nfh_len);
3357         if (dp != NULL) {
3358                 nfscl_cleandeleg(dp);
3359                 nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3360                 NFSUNLOCKCLSTATE();
3361                 newnfs_copycred(&dp->nfsdl_cred, cred);
3362                 nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3363                 free(dp, M_NFSCLDELEG);
3364         } else
3365                 NFSUNLOCKCLSTATE();
3366         NFSFREECRED(cred);
3367 }
3368
3369 /*
3370  * Do a callback RPC.
3371  */
3372 void
3373 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3374 {
3375         int clist, gotseq_ok, i, j, k, op, rcalls;
3376         u_int32_t *tl;
3377         struct nfsclclient *clp;
3378         struct nfscldeleg *dp = NULL;
3379         int numops, taglen = -1, error = 0, trunc __unused;
3380         u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3381         u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3382         vnode_t vp = NULL;
3383         struct nfsnode *np;
3384         struct vattr va;
3385         struct nfsfh *nfhp;
3386         mount_t mp;
3387         nfsattrbit_t attrbits, rattrbits;
3388         nfsv4stateid_t stateid;
3389         uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3390         uint8_t sessionid[NFSX_V4SESSIONID];
3391         struct mbuf *rep;
3392         struct nfscllayout *lyp;
3393         uint64_t filesid[2], len, off;
3394         int changed, gotone, laytype, recalltype;
3395         uint32_t iomode;
3396         struct nfsclrecalllayout *recallp = NULL;
3397         struct nfsclsession *tsep;
3398
3399         gotseq_ok = 0;
3400         nfsrvd_rephead(nd);
3401         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3402         taglen = fxdr_unsigned(int, *tl);
3403         if (taglen < 0) {
3404                 error = EBADRPC;
3405                 goto nfsmout;
3406         }
3407         if (taglen <= NFSV4_SMALLSTR)
3408                 tagstr = tag;
3409         else
3410                 tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3411         error = nfsrv_mtostr(nd, tagstr, taglen);
3412         if (error) {
3413                 if (taglen > NFSV4_SMALLSTR)
3414                         free(tagstr, M_TEMP);
3415                 taglen = -1;
3416                 goto nfsmout;
3417         }
3418         (void) nfsm_strtom(nd, tag, taglen);
3419         if (taglen > NFSV4_SMALLSTR) {
3420                 free(tagstr, M_TEMP);
3421         }
3422         NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3423         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3424         minorvers = fxdr_unsigned(u_int32_t, *tl++);
3425         if (minorvers != NFSV4_MINORVERSION &&
3426             minorvers != NFSV41_MINORVERSION &&
3427             minorvers != NFSV42_MINORVERSION)
3428                 nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3429         cbident = fxdr_unsigned(u_int32_t, *tl++);
3430         if (nd->nd_repstat)
3431                 numops = 0;
3432         else
3433                 numops = fxdr_unsigned(int, *tl);
3434         /*
3435          * Loop around doing the sub ops.
3436          */
3437         for (i = 0; i < numops; i++) {
3438                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3439                 NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3440                 *repp++ = *tl;
3441                 op = fxdr_unsigned(int, *tl);
3442                 if (op < NFSV4OP_CBGETATTR ||
3443                    (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3444                    (op > NFSV4OP_CBNOTIFYDEVID &&
3445                     minorvers == NFSV41_MINORVERSION) ||
3446                    (op > NFSV4OP_CBOFFLOAD &&
3447                     minorvers == NFSV42_MINORVERSION)) {
3448                     nd->nd_repstat = NFSERR_OPILLEGAL;
3449                     *repp = nfscl_errmap(nd, minorvers);
3450                     retops++;
3451                     break;
3452                 }
3453                 nd->nd_procnum = op;
3454                 if (op < NFSV42_CBNOPS)
3455                         nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3456                 switch (op) {
3457                 case NFSV4OP_CBGETATTR:
3458                         NFSCL_DEBUG(4, "cbgetattr\n");
3459                         mp = NULL;
3460                         vp = NULL;
3461                         error = nfsm_getfh(nd, &nfhp);
3462                         if (!error)
3463                                 error = nfsrv_getattrbits(nd, &attrbits,
3464                                     NULL, NULL);
3465                         if (error == 0 && i == 0 &&
3466                             minorvers != NFSV4_MINORVERSION)
3467                                 error = NFSERR_OPNOTINSESS;
3468                         if (!error) {
3469                                 mp = nfscl_getmnt(minorvers, sessionid, cbident,
3470                                     &clp);
3471                                 if (mp == NULL)
3472                                         error = NFSERR_SERVERFAULT;
3473                         }
3474                         if (!error) {
3475                                 error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3476                                     nfhp->nfh_len, p, &np);
3477                                 if (!error)
3478                                         vp = NFSTOV(np);
3479                         }
3480                         if (!error) {
3481                                 NFSZERO_ATTRBIT(&rattrbits);
3482                                 NFSLOCKCLSTATE();
3483                                 dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3484                                     nfhp->nfh_len);
3485                                 if (dp != NULL) {
3486                                         if (NFSISSET_ATTRBIT(&attrbits,
3487                                             NFSATTRBIT_SIZE)) {
3488                                                 if (vp != NULL)
3489                                                         va.va_size = np->n_size;
3490                                                 else
3491                                                         va.va_size =
3492                                                             dp->nfsdl_size;
3493                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3494                                                     NFSATTRBIT_SIZE);
3495                                         }
3496                                         if (NFSISSET_ATTRBIT(&attrbits,
3497                                             NFSATTRBIT_CHANGE)) {
3498                                                 va.va_filerev =
3499                                                     dp->nfsdl_change;
3500                                                 if (vp == NULL ||
3501                                                     (np->n_flag & NDELEGMOD))
3502                                                         va.va_filerev++;
3503                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3504                                                     NFSATTRBIT_CHANGE);
3505                                         }
3506                                 } else
3507                                         error = NFSERR_SERVERFAULT;
3508                                 NFSUNLOCKCLSTATE();
3509                         }
3510                         if (vp != NULL)
3511                                 vrele(vp);
3512                         if (mp != NULL)
3513                                 vfs_unbusy(mp);
3514                         if (nfhp != NULL)
3515                                 free(nfhp, M_NFSFH);
3516                         if (!error)
3517                                 (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3518                                     NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3519                                     (uint64_t)0, NULL);
3520                         break;
3521                 case NFSV4OP_CBRECALL:
3522                         NFSCL_DEBUG(4, "cbrecall\n");
3523                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3524                             NFSX_UNSIGNED);
3525                         stateid.seqid = *tl++;
3526                         NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3527                             NFSX_STATEIDOTHER);
3528                         tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3529                         trunc = fxdr_unsigned(int, *tl);
3530                         error = nfsm_getfh(nd, &nfhp);
3531                         if (error == 0 && i == 0 &&
3532                             minorvers != NFSV4_MINORVERSION)
3533                                 error = NFSERR_OPNOTINSESS;
3534                         if (!error) {
3535                                 NFSLOCKCLSTATE();
3536                                 if (minorvers == NFSV4_MINORVERSION)
3537                                         clp = nfscl_getclnt(cbident);
3538                                 else
3539                                         clp = nfscl_getclntsess(sessionid);
3540                                 if (clp != NULL) {
3541                                         dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3542                                             nfhp->nfh_len);
3543                                         if (dp != NULL && (dp->nfsdl_flags &
3544                                             NFSCLDL_DELEGRET) == 0) {
3545                                                 dp->nfsdl_flags |=
3546                                                     NFSCLDL_RECALL;
3547                                                 wakeup((caddr_t)clp);
3548                                         }
3549                                 } else {
3550                                         error = NFSERR_SERVERFAULT;
3551                                 }
3552                                 NFSUNLOCKCLSTATE();
3553                         }
3554                         if (nfhp != NULL)
3555                                 free(nfhp, M_NFSFH);
3556                         break;
3557                 case NFSV4OP_CBLAYOUTRECALL:
3558                         NFSCL_DEBUG(4, "cblayrec\n");
3559                         nfhp = NULL;
3560                         NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3561                         laytype = fxdr_unsigned(int, *tl++);
3562                         iomode = fxdr_unsigned(uint32_t, *tl++);
3563                         if (newnfs_true == *tl++)
3564                                 changed = 1;
3565                         else
3566                                 changed = 0;
3567                         recalltype = fxdr_unsigned(int, *tl);
3568                         NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3569                             laytype, iomode, changed, recalltype);
3570                         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3571                             M_WAITOK);
3572                         if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3573                             laytype != NFSLAYOUT_FLEXFILE)
3574                                 error = NFSERR_NOMATCHLAYOUT;
3575                         else if (recalltype == NFSLAYOUTRETURN_FILE) {
3576                                 error = nfsm_getfh(nd, &nfhp);
3577                                 NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3578                                 if (error != 0)
3579                                         goto nfsmout;
3580                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3581                                     NFSX_STATEID);
3582                                 off = fxdr_hyper(tl); tl += 2;
3583                                 len = fxdr_hyper(tl); tl += 2;
3584                                 stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3585                                 NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3586                                 if (minorvers == NFSV4_MINORVERSION)
3587                                         error = NFSERR_NOTSUPP;
3588                                 else if (i == 0)
3589                                         error = NFSERR_OPNOTINSESS;
3590                                 NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3591                                     (uintmax_t)off, (uintmax_t)len,
3592                                     stateid.seqid, error);
3593                                 if (error == 0) {
3594                                         NFSLOCKCLSTATE();
3595                                         clp = nfscl_getclntsess(sessionid);
3596                                         NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3597                                         if (clp != NULL) {
3598                                                 lyp = nfscl_findlayout(clp,
3599                                                     nfhp->nfh_fh,
3600                                                     nfhp->nfh_len);
3601                                                 NFSCL_DEBUG(4, "cblyp=%p\n",
3602                                                     lyp);
3603                                                 if (lyp != NULL &&
3604                                                     (lyp->nfsly_flags &
3605                                                      (NFSLY_FILES |
3606                                                       NFSLY_FLEXFILE)) != 0 &&
3607                                                     !NFSBCMP(stateid.other,
3608                                                     lyp->nfsly_stateid.other,
3609                                                     NFSX_STATEIDOTHER)) {
3610                                                         error =
3611                                                             nfscl_layoutrecall(
3612                                                             recalltype,
3613                                                             lyp, iomode, off,
3614                                                             len, stateid.seqid,
3615                                                             0, 0, NULL,
3616                                                             recallp);
3617                                                         if (error == 0 &&
3618                                                             stateid.seqid >
3619                                                             lyp->nfsly_stateid.seqid)
3620                                                                 lyp->nfsly_stateid.seqid =
3621                                                                     stateid.seqid;
3622                                                         recallp = NULL;
3623                                                         wakeup(clp);
3624                                                         NFSCL_DEBUG(4,
3625                                                             "aft layrcal=%d "
3626                                                             "layseqid=%d\n",
3627                                                             error,
3628                                                             lyp->nfsly_stateid.seqid);
3629                                                 } else
3630                                                         error =
3631                                                           NFSERR_NOMATCHLAYOUT;
3632                                         } else
3633                                                 error = NFSERR_NOMATCHLAYOUT;
3634                                         NFSUNLOCKCLSTATE();
3635                                 }
3636                                 free(nfhp, M_NFSFH);
3637                         } else if (recalltype == NFSLAYOUTRETURN_FSID) {
3638                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3639                                 filesid[0] = fxdr_hyper(tl); tl += 2;
3640                                 filesid[1] = fxdr_hyper(tl); tl += 2;
3641                                 gotone = 0;
3642                                 NFSLOCKCLSTATE();
3643                                 clp = nfscl_getclntsess(sessionid);
3644                                 if (clp != NULL) {
3645                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3646                                             nfsly_list) {
3647                                                 if (lyp->nfsly_filesid[0] ==
3648                                                     filesid[0] &&
3649                                                     lyp->nfsly_filesid[1] ==
3650                                                     filesid[1]) {
3651                                                         error =
3652                                                             nfscl_layoutrecall(
3653                                                             recalltype,
3654                                                             lyp, iomode, 0,
3655                                                             UINT64_MAX,
3656                                                             lyp->nfsly_stateid.seqid,
3657                                                             0, 0, NULL,
3658                                                             recallp);
3659                                                         recallp = NULL;
3660                                                         gotone = 1;
3661                                                 }
3662                                         }
3663                                         if (gotone != 0)
3664                                                 wakeup(clp);
3665                                         else
3666                                                 error = NFSERR_NOMATCHLAYOUT;
3667                                 } else
3668                                         error = NFSERR_NOMATCHLAYOUT;
3669                                 NFSUNLOCKCLSTATE();
3670                         } else if (recalltype == NFSLAYOUTRETURN_ALL) {
3671                                 gotone = 0;
3672                                 NFSLOCKCLSTATE();
3673                                 clp = nfscl_getclntsess(sessionid);
3674                                 if (clp != NULL) {
3675                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3676                                             nfsly_list) {
3677                                                 error = nfscl_layoutrecall(
3678                                                     recalltype, lyp, iomode, 0,
3679                                                     UINT64_MAX,
3680                                                     lyp->nfsly_stateid.seqid,
3681                                                     0, 0, NULL, recallp);
3682                                                 recallp = NULL;
3683                                                 gotone = 1;
3684                                         }
3685                                         if (gotone != 0)
3686                                                 wakeup(clp);
3687                                         else
3688                                                 error = NFSERR_NOMATCHLAYOUT;
3689                                 } else
3690                                         error = NFSERR_NOMATCHLAYOUT;
3691                                 NFSUNLOCKCLSTATE();
3692                         } else
3693                                 error = NFSERR_NOMATCHLAYOUT;
3694                         if (recallp != NULL) {
3695                                 free(recallp, M_NFSLAYRECALL);
3696                                 recallp = NULL;
3697                         }
3698                         break;
3699                 case NFSV4OP_CBSEQUENCE:
3700                         NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3701                             5 * NFSX_UNSIGNED);
3702                         bcopy(tl, sessionid, NFSX_V4SESSIONID);
3703                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3704                         seqid = fxdr_unsigned(uint32_t, *tl++);
3705                         slotid = fxdr_unsigned(uint32_t, *tl++);
3706                         highslot = fxdr_unsigned(uint32_t, *tl++);
3707                         cachethis = *tl++;
3708                         /* Throw away the referring call stuff. */
3709                         clist = fxdr_unsigned(int, *tl);
3710                         for (j = 0; j < clist; j++) {
3711                                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3712                                     NFSX_UNSIGNED);
3713                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3714                                 rcalls = fxdr_unsigned(int, *tl);
3715                                 for (k = 0; k < rcalls; k++) {
3716                                         NFSM_DISSECT(tl, uint32_t *,
3717                                             2 * NFSX_UNSIGNED);
3718                                 }
3719                         }
3720                         NFSLOCKCLSTATE();
3721                         if (i == 0) {
3722                                 clp = nfscl_getclntsess(sessionid);
3723                                 if (clp == NULL)
3724                                         error = NFSERR_SERVERFAULT;
3725                         } else
3726                                 error = NFSERR_SEQUENCEPOS;
3727                         if (error == 0) {
3728                                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3729                                 error = nfsv4_seqsession(seqid, slotid,
3730                                     highslot, tsep->nfsess_cbslots, &rep,
3731                                     tsep->nfsess_backslots);
3732                         }
3733                         NFSUNLOCKCLSTATE();
3734                         if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3735                                 gotseq_ok = 1;
3736                                 if (rep != NULL) {
3737                                         /*
3738                                          * Handle a reply for a retried
3739                                          * callback.  The reply will be
3740                                          * re-inserted in the session cache
3741                                          * by the nfsv4_seqsess_cacherep() call
3742                                          * after out:
3743                                          */
3744                                         KASSERT(error == NFSERR_REPLYFROMCACHE,
3745                                             ("cbsequence: non-NULL rep"));
3746                                         NFSCL_DEBUG(4, "Got cbretry\n");
3747                                         m_freem(nd->nd_mreq);
3748                                         nd->nd_mreq = rep;
3749                                         rep = NULL;
3750                                         goto out;
3751                                 }
3752                                 NFSM_BUILD(tl, uint32_t *,
3753                                     NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3754                                 bcopy(sessionid, tl, NFSX_V4SESSIONID);
3755                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3756                                 *tl++ = txdr_unsigned(seqid);
3757                                 *tl++ = txdr_unsigned(slotid);
3758                                 *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3759                                 *tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3760                         }
3761                         break;
3762                 default:
3763                         if (i == 0 && minorvers != NFSV4_MINORVERSION)
3764                                 error = NFSERR_OPNOTINSESS;
3765                         else {
3766                                 NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3767                                 error = NFSERR_NOTSUPP;
3768                         }
3769                         break;
3770                 }
3771                 if (error) {
3772                         if (error == EBADRPC || error == NFSERR_BADXDR) {
3773                                 nd->nd_repstat = NFSERR_BADXDR;
3774                         } else {
3775                                 nd->nd_repstat = error;
3776                         }
3777                         error = 0;
3778                 }
3779                 retops++;
3780                 if (nd->nd_repstat) {
3781                         *repp = nfscl_errmap(nd, minorvers);
3782                         break;
3783                 } else
3784                         *repp = 0;      /* NFS4_OK */
3785         }
3786 nfsmout:
3787         if (recallp != NULL)
3788                 free(recallp, M_NFSLAYRECALL);
3789         if (error) {
3790                 if (error == EBADRPC || error == NFSERR_BADXDR)
3791                         nd->nd_repstat = NFSERR_BADXDR;
3792                 else
3793                         printf("nfsv4 comperr1=%d\n", error);
3794         }
3795         if (taglen == -1) {
3796                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3797                 *tl++ = 0;
3798                 *tl = 0;
3799         } else {
3800                 *retopsp = txdr_unsigned(retops);
3801         }
3802         *nd->nd_errp = nfscl_errmap(nd, minorvers);
3803 out:
3804         if (gotseq_ok != 0) {
3805                 rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3806                 NFSLOCKCLSTATE();
3807                 clp = nfscl_getclntsess(sessionid);
3808                 if (clp != NULL) {
3809                         tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3810                         nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3811                             NFSERR_OK, &rep);
3812                         NFSUNLOCKCLSTATE();
3813                 } else {
3814                         NFSUNLOCKCLSTATE();
3815                         m_freem(rep);
3816                 }
3817         }
3818 }
3819
3820 /*
3821  * Generate the next cbident value. Basically just increment a static value
3822  * and then check that it isn't already in the list, if it has wrapped around.
3823  */
3824 static u_int32_t
3825 nfscl_nextcbident(void)
3826 {
3827         struct nfsclclient *clp;
3828         int matched;
3829         static u_int32_t nextcbident = 0;
3830         static int haswrapped = 0;
3831
3832         nextcbident++;
3833         if (nextcbident == 0)
3834                 haswrapped = 1;
3835         if (haswrapped) {
3836                 /*
3837                  * Search the clientid list for one already using this cbident.
3838                  */
3839                 do {
3840                         matched = 0;
3841                         NFSLOCKCLSTATE();
3842                         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3843                                 if (clp->nfsc_cbident == nextcbident) {
3844                                         matched = 1;
3845                                         break;
3846                                 }
3847                         }
3848                         NFSUNLOCKCLSTATE();
3849                         if (matched == 1)
3850                                 nextcbident++;
3851                 } while (matched);
3852         }
3853         return (nextcbident);
3854 }
3855
3856 /*
3857  * Get the mount point related to a given cbident or session and busy it.
3858  */
3859 static mount_t
3860 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3861     struct nfsclclient **clpp)
3862 {
3863         struct nfsclclient *clp;
3864         mount_t mp;
3865         int error;
3866         struct nfsclsession *tsep;
3867
3868         *clpp = NULL;
3869         NFSLOCKCLSTATE();
3870         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3871                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3872                 if (minorvers == NFSV4_MINORVERSION) {
3873                         if (clp->nfsc_cbident == cbident)
3874                                 break;
3875                 } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3876                     NFSX_V4SESSIONID))
3877                         break;
3878         }
3879         if (clp == NULL) {
3880                 NFSUNLOCKCLSTATE();
3881                 return (NULL);
3882         }
3883         mp = clp->nfsc_nmp->nm_mountp;
3884         vfs_ref(mp);
3885         NFSUNLOCKCLSTATE();
3886         error = vfs_busy(mp, 0);
3887         vfs_rel(mp);
3888         if (error != 0)
3889                 return (NULL);
3890         *clpp = clp;
3891         return (mp);
3892 }
3893
3894 /*
3895  * Get the clientid pointer related to a given cbident.
3896  */
3897 static struct nfsclclient *
3898 nfscl_getclnt(u_int32_t cbident)
3899 {
3900         struct nfsclclient *clp;
3901
3902         LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3903                 if (clp->nfsc_cbident == cbident)
3904                         break;
3905         return (clp);
3906 }
3907
3908 /*
3909  * Get the clientid pointer related to a given sessionid.
3910  */
3911 static struct nfsclclient *
3912 nfscl_getclntsess(uint8_t *sessionid)
3913 {
3914         struct nfsclclient *clp;
3915         struct nfsclsession *tsep;
3916
3917         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3918                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3919                 if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3920                     NFSX_V4SESSIONID))
3921                         break;
3922         }
3923         return (clp);
3924 }
3925
3926 /*
3927  * Search for a lock conflict locally on the client. A conflict occurs if
3928  * - not same owner and overlapping byte range and at least one of them is
3929  *   a write lock or this is an unlock.
3930  */
3931 static int
3932 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3933     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3934     struct nfscllock **lopp)
3935 {
3936         struct nfsclowner *owp;
3937         struct nfsclopen *op;
3938         int ret;
3939
3940         if (dp != NULL) {
3941                 ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3942                 if (ret)
3943                         return (ret);
3944         }
3945         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3946                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3947                         if (op->nfso_fhlen == fhlen &&
3948                             !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3949                                 ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3950                                     own, lopp);
3951                                 if (ret)
3952                                         return (ret);
3953                         }
3954                 }
3955         }
3956         return (0);
3957 }
3958
3959 static int
3960 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3961     u_int8_t *own, struct nfscllock **lopp)
3962 {
3963         struct nfscllockowner *lp;
3964         struct nfscllock *lop;
3965
3966         LIST_FOREACH(lp, lhp, nfsl_list) {
3967                 if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3968                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3969                                 if (lop->nfslo_first >= nlop->nfslo_end)
3970                                         break;
3971                                 if (lop->nfslo_end <= nlop->nfslo_first)
3972                                         continue;
3973                                 if (lop->nfslo_type == F_WRLCK ||
3974                                     nlop->nfslo_type == F_WRLCK ||
3975                                     nlop->nfslo_type == F_UNLCK) {
3976                                         if (lopp != NULL)
3977                                                 *lopp = lop;
3978                                         return (NFSERR_DENIED);
3979                                 }
3980                         }
3981                 }
3982         }
3983         return (0);
3984 }
3985
3986 /*
3987  * Check for a local conflicting lock.
3988  */
3989 int
3990 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
3991     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
3992 {
3993         struct nfscllock *lop, nlck;
3994         struct nfscldeleg *dp;
3995         struct nfsnode *np;
3996         u_int8_t own[NFSV4CL_LOCKNAMELEN];
3997         int error;
3998
3999         nlck.nfslo_type = fl->l_type;
4000         nlck.nfslo_first = off;
4001         if (len == NFS64BITSSET) {
4002                 nlck.nfslo_end = NFS64BITSSET;
4003         } else {
4004                 nlck.nfslo_end = off + len;
4005                 if (nlck.nfslo_end <= nlck.nfslo_first)
4006                         return (NFSERR_INVAL);
4007         }
4008         np = VTONFS(vp);
4009         nfscl_filllockowner(id, own, flags);
4010         NFSLOCKCLSTATE();
4011         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4012         error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4013             &nlck, own, dp, &lop);
4014         if (error != 0) {
4015                 fl->l_whence = SEEK_SET;
4016                 fl->l_start = lop->nfslo_first;
4017                 if (lop->nfslo_end == NFS64BITSSET)
4018                         fl->l_len = 0;
4019                 else
4020                         fl->l_len = lop->nfslo_end - lop->nfslo_first;
4021                 fl->l_pid = (pid_t)0;
4022                 fl->l_type = lop->nfslo_type;
4023                 error = -1;                     /* no RPC required */
4024         } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4025             fl->l_type == F_RDLCK)) {
4026                 /*
4027                  * The delegation ensures that there isn't a conflicting
4028                  * lock on the server, so return -1 to indicate an RPC
4029                  * isn't required.
4030                  */
4031                 fl->l_type = F_UNLCK;
4032                 error = -1;
4033         }
4034         NFSUNLOCKCLSTATE();
4035         return (error);
4036 }
4037
4038 /*
4039  * Handle Recall of a delegation.
4040  * The clp must be exclusive locked when this is called.
4041  */
4042 static int
4043 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4044     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4045     int called_from_renewthread, vnode_t *vpp)
4046 {
4047         struct nfsclowner *owp, *lowp, *nowp;
4048         struct nfsclopen *op, *lop;
4049         struct nfscllockowner *lp;
4050         struct nfscllock *lckp;
4051         struct nfsnode *np;
4052         int error = 0, ret;
4053
4054         if (vp == NULL) {
4055                 KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4056                 *vpp = NULL;
4057                 /*
4058                  * First, get a vnode for the file. This is needed to do RPCs.
4059                  */
4060                 ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4061                     dp->nfsdl_fhlen, p, &np);
4062                 if (ret) {
4063                         /*
4064                          * File isn't open, so nothing to move over to the
4065                          * server.
4066                          */
4067                         return (0);
4068                 }
4069                 vp = NFSTOV(np);
4070                 *vpp = vp;
4071         } else {
4072                 np = VTONFS(vp);
4073         }
4074         dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4075
4076         /*
4077          * Ok, if it's a write delegation, flush data to the server, so
4078          * that close/open consistency is retained.
4079          */
4080         ret = 0;
4081         NFSLOCKNODE(np);
4082         if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4083                 np->n_flag |= NDELEGRECALL;
4084                 NFSUNLOCKNODE(np);
4085                 ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4086                 NFSLOCKNODE(np);
4087                 np->n_flag &= ~NDELEGRECALL;
4088         }
4089         NFSINVALATTRCACHE(np);
4090         NFSUNLOCKNODE(np);
4091         if (ret == EIO && called_from_renewthread != 0) {
4092                 /*
4093                  * If the flush failed with EIO for the renew thread,
4094                  * return now, so that the dirty buffer will be flushed
4095                  * later.
4096                  */
4097                 return (ret);
4098         }
4099
4100         /*
4101          * Now, for each openowner with opens issued locally, move them
4102          * over to state against the server.
4103          */
4104         LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4105                 lop = LIST_FIRST(&lowp->nfsow_open);
4106                 if (lop != NULL) {
4107                         if (LIST_NEXT(lop, nfso_list) != NULL)
4108                                 panic("nfsdlg mult opens");
4109                         /*
4110                          * Look for the same openowner against the server.
4111                          */
4112                         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4113                                 if (!NFSBCMP(lowp->nfsow_owner,
4114                                     owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4115                                         newnfs_copycred(&dp->nfsdl_cred, cred);
4116                                         ret = nfscl_moveopen(vp, clp, nmp, lop,
4117                                             owp, dp, cred, p);
4118                                         if (ret == NFSERR_STALECLIENTID ||
4119                                             ret == NFSERR_STALEDONTRECOVER ||
4120                                             ret == NFSERR_BADSESSION)
4121                                                 return (ret);
4122                                         if (ret) {
4123                                                 nfscl_freeopen(lop, 1);
4124                                                 if (!error)
4125                                                         error = ret;
4126                                         }
4127                                         break;
4128                                 }
4129                         }
4130
4131                         /*
4132                          * If no openowner found, create one and get an open
4133                          * for it.
4134                          */
4135                         if (owp == NULL) {
4136                                 nowp = malloc(
4137                                     sizeof (struct nfsclowner), M_NFSCLOWNER,
4138                                     M_WAITOK);
4139                                 nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
4140                                     NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4141                                     dp->nfsdl_fhlen, NULL, NULL);
4142                                 newnfs_copycred(&dp->nfsdl_cred, cred);
4143                                 ret = nfscl_moveopen(vp, clp, nmp, lop,
4144                                     owp, dp, cred, p);
4145                                 if (ret) {
4146                                         nfscl_freeopenowner(owp, 0);
4147                                         if (ret == NFSERR_STALECLIENTID ||
4148                                             ret == NFSERR_STALEDONTRECOVER ||
4149                                             ret == NFSERR_BADSESSION)
4150                                                 return (ret);
4151                                         if (ret) {
4152                                                 nfscl_freeopen(lop, 1);
4153                                                 if (!error)
4154                                                         error = ret;
4155                                         }
4156                                 }
4157                         }
4158                 }
4159         }
4160
4161         /*
4162          * Now, get byte range locks for any locks done locally.
4163          */
4164         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4165                 LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4166                         newnfs_copycred(&dp->nfsdl_cred, cred);
4167                         ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4168                         if (ret == NFSERR_STALESTATEID ||
4169                             ret == NFSERR_STALEDONTRECOVER ||
4170                             ret == NFSERR_STALECLIENTID ||
4171                             ret == NFSERR_BADSESSION)
4172                                 return (ret);
4173                         if (ret && !error)
4174                                 error = ret;
4175                 }
4176         }
4177         return (error);
4178 }
4179
4180 /*
4181  * Move a locally issued open over to an owner on the state list.
4182  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4183  * returns with it unlocked.
4184  */
4185 static int
4186 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4187     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4188     struct ucred *cred, NFSPROC_T *p)
4189 {
4190         struct nfsclopen *op, *nop;
4191         struct nfscldeleg *ndp;
4192         struct nfsnode *np;
4193         int error = 0, newone;
4194
4195         /*
4196          * First, look for an appropriate open, If found, just increment the
4197          * opencnt in it.
4198          */
4199         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4200                 if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4201                     op->nfso_fhlen == lop->nfso_fhlen &&
4202                     !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4203                         op->nfso_opencnt += lop->nfso_opencnt;
4204                         nfscl_freeopen(lop, 1);
4205                         return (0);
4206                 }
4207         }
4208
4209         /* No appropriate open, so we have to do one against the server. */
4210         np = VTONFS(vp);
4211         nop = malloc(sizeof (struct nfsclopen) +
4212             lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4213         nop->nfso_hash.le_prev = NULL;
4214         newone = 0;
4215         nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4216             lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4217         ndp = dp;
4218         error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4219             lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4220             NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4221         if (error) {
4222                 if (newone)
4223                         nfscl_freeopen(op, 0);
4224         } else {
4225                 op->nfso_mode |= lop->nfso_mode;
4226                 op->nfso_opencnt += lop->nfso_opencnt;
4227                 nfscl_freeopen(lop, 1);
4228         }
4229         if (nop != NULL)
4230                 free(nop, M_NFSCLOPEN);
4231         if (ndp != NULL) {
4232                 /*
4233                  * What should I do with the returned delegation, since the
4234                  * delegation is being recalled? For now, just printf and
4235                  * through it away.
4236                  */
4237                 printf("Moveopen returned deleg\n");
4238                 free(ndp, M_NFSCLDELEG);
4239         }
4240         return (error);
4241 }
4242
4243 /*
4244  * Recall all delegations on this client.
4245  */
4246 static void
4247 nfscl_totalrecall(struct nfsclclient *clp)
4248 {
4249         struct nfscldeleg *dp;
4250
4251         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4252                 if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4253                         dp->nfsdl_flags |= NFSCLDL_RECALL;
4254         }
4255 }
4256
4257 /*
4258  * Relock byte ranges. Called for delegation recall and state expiry.
4259  */
4260 static int
4261 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4262     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4263     NFSPROC_T *p)
4264 {
4265         struct nfscllockowner *nlp;
4266         struct nfsfh *nfhp;
4267         u_int64_t off, len;
4268         int error, newone, donelocally;
4269
4270         off = lop->nfslo_first;
4271         len = lop->nfslo_end - lop->nfslo_first;
4272         error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4273             clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4274             lp->nfsl_openowner, &nlp, &newone, &donelocally);
4275         if (error || donelocally)
4276                 return (error);
4277         nfhp = VTONFS(vp)->n_fhp;
4278         error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4279             nfhp->nfh_len, nlp, newone, 0, off,
4280             len, lop->nfslo_type, cred, p);
4281         if (error)
4282                 nfscl_freelockowner(nlp, 0);
4283         return (error);
4284 }
4285
4286 /*
4287  * Called to re-open a file. Basically get a vnode for the file handle
4288  * and then call nfsrpc_openrpc() to do the rest.
4289  */
4290 static int
4291 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4292     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4293     struct ucred *cred, NFSPROC_T *p)
4294 {
4295         struct nfsnode *np;
4296         vnode_t vp;
4297         int error;
4298
4299         error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4300         if (error)
4301                 return (error);
4302         vp = NFSTOV(np);
4303         if (np->n_v4 != NULL) {
4304                 error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4305                     np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4306                     NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4307                     cred, p);
4308         } else {
4309                 error = EINVAL;
4310         }
4311         vrele(vp);
4312         return (error);
4313 }
4314
4315 /*
4316  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4317  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4318  * fail.
4319  */
4320 static int
4321 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4322     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4323     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4324     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4325 {
4326         int error;
4327
4328         do {
4329                 error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4330                     mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4331                     0, 0);
4332                 if (error == NFSERR_DELAY)
4333                         (void) nfs_catnap(PZERO, error, "nfstryop");
4334         } while (error == NFSERR_DELAY);
4335         if (error == EAUTH || error == EACCES) {
4336                 /* Try again using system credentials */
4337                 newnfs_setroot(cred);
4338                 do {
4339                     error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4340                         newfhlen, mode, op, name, namelen, ndpp, reclaim,
4341                         delegtype, cred, p, 1, 0);
4342                     if (error == NFSERR_DELAY)
4343                         (void) nfs_catnap(PZERO, error, "nfstryop");
4344                 } while (error == NFSERR_DELAY);
4345         }
4346         return (error);
4347 }
4348
4349 /*
4350  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4351  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4352  * cred don't work.
4353  */
4354 static int
4355 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4356     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4357     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4358 {
4359         struct nfsrv_descript nfsd, *nd = &nfsd;
4360         int error;
4361
4362         do {
4363                 error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4364                     reclaim, off, len, type, cred, p, 0);
4365                 if (!error && nd->nd_repstat == NFSERR_DELAY)
4366                         (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4367                             "nfstrylck");
4368         } while (!error && nd->nd_repstat == NFSERR_DELAY);
4369         if (!error)
4370                 error = nd->nd_repstat;
4371         if (error == EAUTH || error == EACCES) {
4372                 /* Try again using root credentials */
4373                 newnfs_setroot(cred);
4374                 do {
4375                         error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4376                             newone, reclaim, off, len, type, cred, p, 1);
4377                         if (!error && nd->nd_repstat == NFSERR_DELAY)
4378                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4379                                     "nfstrylck");
4380                 } while (!error && nd->nd_repstat == NFSERR_DELAY);
4381                 if (!error)
4382                         error = nd->nd_repstat;
4383         }
4384         return (error);
4385 }
4386
4387 /*
4388  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4389  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4390  * credentials fail.
4391  */
4392 static int
4393 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4394     struct nfsmount *nmp, NFSPROC_T *p)
4395 {
4396         int error;
4397
4398         do {
4399                 error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4400                 if (error == NFSERR_DELAY)
4401                         (void) nfs_catnap(PZERO, error, "nfstrydp");
4402         } while (error == NFSERR_DELAY);
4403         if (error == EAUTH || error == EACCES) {
4404                 /* Try again using system credentials */
4405                 newnfs_setroot(cred);
4406                 do {
4407                         error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4408                         if (error == NFSERR_DELAY)
4409                                 (void) nfs_catnap(PZERO, error, "nfstrydp");
4410                 } while (error == NFSERR_DELAY);
4411         }
4412         return (error);
4413 }
4414
4415 /*
4416  * Try a close against the server. Just call nfsrpc_closerpc(),
4417  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4418  * credentials fail.
4419  */
4420 int
4421 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4422     struct nfsmount *nmp, NFSPROC_T *p)
4423 {
4424         struct nfsrv_descript nfsd, *nd = &nfsd;
4425         int error;
4426
4427         do {
4428                 error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4429                 if (error == NFSERR_DELAY)
4430                         (void) nfs_catnap(PZERO, error, "nfstrycl");
4431         } while (error == NFSERR_DELAY);
4432         if (error == EAUTH || error == EACCES) {
4433                 /* Try again using system credentials */
4434                 newnfs_setroot(cred);
4435                 do {
4436                         error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4437                         if (error == NFSERR_DELAY)
4438                                 (void) nfs_catnap(PZERO, error, "nfstrycl");
4439                 } while (error == NFSERR_DELAY);
4440         }
4441         return (error);
4442 }
4443
4444 /*
4445  * Decide if a delegation on a file permits close without flushing writes
4446  * to the server. This might be a big performance win in some environments.
4447  * (Not useful until the client does caching on local stable storage.)
4448  */
4449 int
4450 nfscl_mustflush(vnode_t vp)
4451 {
4452         struct nfsclclient *clp;
4453         struct nfscldeleg *dp;
4454         struct nfsnode *np;
4455         struct nfsmount *nmp;
4456
4457         np = VTONFS(vp);
4458         nmp = VFSTONFS(vp->v_mount);
4459         if (!NFSHASNFSV4(nmp))
4460                 return (1);
4461         NFSLOCKCLSTATE();
4462         clp = nfscl_findcl(nmp);
4463         if (clp == NULL) {
4464                 NFSUNLOCKCLSTATE();
4465                 return (1);
4466         }
4467         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4468         if (dp != NULL && (dp->nfsdl_flags &
4469             (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4470              NFSCLDL_WRITE &&
4471             (dp->nfsdl_sizelimit >= np->n_size ||
4472              !NFSHASSTRICT3530(nmp))) {
4473                 NFSUNLOCKCLSTATE();
4474                 return (0);
4475         }
4476         NFSUNLOCKCLSTATE();
4477         return (1);
4478 }
4479
4480 /*
4481  * See if a (write) delegation exists for this file.
4482  */
4483 int
4484 nfscl_nodeleg(vnode_t vp, int writedeleg)
4485 {
4486         struct nfsclclient *clp;
4487         struct nfscldeleg *dp;
4488         struct nfsnode *np;
4489         struct nfsmount *nmp;
4490
4491         np = VTONFS(vp);
4492         nmp = VFSTONFS(vp->v_mount);
4493         if (!NFSHASNFSV4(nmp))
4494                 return (1);
4495         NFSLOCKCLSTATE();
4496         clp = nfscl_findcl(nmp);
4497         if (clp == NULL) {
4498                 NFSUNLOCKCLSTATE();
4499                 return (1);
4500         }
4501         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4502         if (dp != NULL &&
4503             (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4504             (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4505              NFSCLDL_WRITE)) {
4506                 NFSUNLOCKCLSTATE();
4507                 return (0);
4508         }
4509         NFSUNLOCKCLSTATE();
4510         return (1);
4511 }
4512
4513 /*
4514  * Look for an associated delegation that should be DelegReturned.
4515  */
4516 int
4517 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4518 {
4519         struct nfsclclient *clp;
4520         struct nfscldeleg *dp;
4521         struct nfsclowner *owp;
4522         struct nfscllockowner *lp;
4523         struct nfsmount *nmp;
4524         struct ucred *cred;
4525         struct nfsnode *np;
4526         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4527
4528         nmp = VFSTONFS(vp->v_mount);
4529         np = VTONFS(vp);
4530         NFSLOCKCLSTATE();
4531         /*
4532          * Loop around waiting for:
4533          * - outstanding I/O operations on delegations to complete
4534          * - for a delegation on vp that has state, lock the client and
4535          *   do a recall
4536          * - return delegation with no state
4537          */
4538         while (1) {
4539                 clp = nfscl_findcl(nmp);
4540                 if (clp == NULL) {
4541                         NFSUNLOCKCLSTATE();
4542                         return (retcnt);
4543                 }
4544                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4545                     np->n_fhp->nfh_len);
4546                 if (dp != NULL) {
4547                     /*
4548                      * Wait for outstanding I/O ops to be done.
4549                      */
4550                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4551                         if (igotlock) {
4552                             nfsv4_unlock(&clp->nfsc_lock, 0);
4553                             igotlock = 0;
4554                         }
4555                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4556                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4557                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4558                         continue;
4559                     }
4560                     needsrecall = 0;
4561                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4562                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4563                             needsrecall = 1;
4564                             break;
4565                         }
4566                     }
4567                     if (!needsrecall) {
4568                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4569                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4570                                 needsrecall = 1;
4571                                 break;
4572                             }
4573                         }
4574                     }
4575                     if (needsrecall && !triedrecall) {
4576                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4577                         islept = 0;
4578                         while (!igotlock) {
4579                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4580                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4581                             if (islept)
4582                                 break;
4583                         }
4584                         if (islept)
4585                             continue;
4586                         NFSUNLOCKCLSTATE();
4587                         cred = newnfs_getcred();
4588                         newnfs_copycred(&dp->nfsdl_cred, cred);
4589                         nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4590                         NFSFREECRED(cred);
4591                         triedrecall = 1;
4592                         NFSLOCKCLSTATE();
4593                         nfsv4_unlock(&clp->nfsc_lock, 0);
4594                         igotlock = 0;
4595                         continue;
4596                     }
4597                     *stp = dp->nfsdl_stateid;
4598                     retcnt = 1;
4599                     nfscl_cleandeleg(dp);
4600                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4601                 }
4602                 if (igotlock)
4603                     nfsv4_unlock(&clp->nfsc_lock, 0);
4604                 NFSUNLOCKCLSTATE();
4605                 return (retcnt);
4606         }
4607 }
4608
4609 /*
4610  * Look for associated delegation(s) that should be DelegReturned.
4611  */
4612 int
4613 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4614     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4615 {
4616         struct nfsclclient *clp;
4617         struct nfscldeleg *dp;
4618         struct nfsclowner *owp;
4619         struct nfscllockowner *lp;
4620         struct nfsmount *nmp;
4621         struct ucred *cred;
4622         struct nfsnode *np;
4623         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4624
4625         nmp = VFSTONFS(fvp->v_mount);
4626         *gotfdp = 0;
4627         *gottdp = 0;
4628         NFSLOCKCLSTATE();
4629         /*
4630          * Loop around waiting for:
4631          * - outstanding I/O operations on delegations to complete
4632          * - for a delegation on fvp that has state, lock the client and
4633          *   do a recall
4634          * - return delegation(s) with no state.
4635          */
4636         while (1) {
4637                 clp = nfscl_findcl(nmp);
4638                 if (clp == NULL) {
4639                         NFSUNLOCKCLSTATE();
4640                         return (retcnt);
4641                 }
4642                 np = VTONFS(fvp);
4643                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4644                     np->n_fhp->nfh_len);
4645                 if (dp != NULL && *gotfdp == 0) {
4646                     /*
4647                      * Wait for outstanding I/O ops to be done.
4648                      */
4649                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4650                         if (igotlock) {
4651                             nfsv4_unlock(&clp->nfsc_lock, 0);
4652                             igotlock = 0;
4653                         }
4654                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4655                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4656                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4657                         continue;
4658                     }
4659                     needsrecall = 0;
4660                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4661                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4662                             needsrecall = 1;
4663                             break;
4664                         }
4665                     }
4666                     if (!needsrecall) {
4667                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4668                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4669                                 needsrecall = 1;
4670                                 break;
4671                             }
4672                         }
4673                     }
4674                     if (needsrecall && !triedrecall) {
4675                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4676                         islept = 0;
4677                         while (!igotlock) {
4678                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4679                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4680                             if (islept)
4681                                 break;
4682                         }
4683                         if (islept)
4684                             continue;
4685                         NFSUNLOCKCLSTATE();
4686                         cred = newnfs_getcred();
4687                         newnfs_copycred(&dp->nfsdl_cred, cred);
4688                         nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4689                         NFSFREECRED(cred);
4690                         triedrecall = 1;
4691                         NFSLOCKCLSTATE();
4692                         nfsv4_unlock(&clp->nfsc_lock, 0);
4693                         igotlock = 0;
4694                         continue;
4695                     }
4696                     *fstp = dp->nfsdl_stateid;
4697                     retcnt++;
4698                     *gotfdp = 1;
4699                     nfscl_cleandeleg(dp);
4700                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4701                 }
4702                 if (igotlock) {
4703                     nfsv4_unlock(&clp->nfsc_lock, 0);
4704                     igotlock = 0;
4705                 }
4706                 if (tvp != NULL) {
4707                     np = VTONFS(tvp);
4708                     dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4709                         np->n_fhp->nfh_len);
4710                     if (dp != NULL && *gottdp == 0) {
4711                         /*
4712                          * Wait for outstanding I/O ops to be done.
4713                          */
4714                         if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4715                             dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4716                             (void) nfsmsleep(&dp->nfsdl_rwlock,
4717                                 NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4718                             continue;
4719                         }
4720                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4721                             if (!LIST_EMPTY(&owp->nfsow_open)) {
4722                                 NFSUNLOCKCLSTATE();
4723                                 return (retcnt);
4724                             }
4725                         }
4726                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4727                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4728                                 NFSUNLOCKCLSTATE();
4729                                 return (retcnt);
4730                             }
4731                         }
4732                         *tstp = dp->nfsdl_stateid;
4733                         retcnt++;
4734                         *gottdp = 1;
4735                         nfscl_cleandeleg(dp);
4736                         nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4737                     }
4738                 }
4739                 NFSUNLOCKCLSTATE();
4740                 return (retcnt);
4741         }
4742 }
4743
4744 /*
4745  * Get a reference on the clientid associated with the mount point.
4746  * Return 1 if success, 0 otherwise.
4747  */
4748 int
4749 nfscl_getref(struct nfsmount *nmp)
4750 {
4751         struct nfsclclient *clp;
4752
4753         NFSLOCKCLSTATE();
4754         clp = nfscl_findcl(nmp);
4755         if (clp == NULL) {
4756                 NFSUNLOCKCLSTATE();
4757                 return (0);
4758         }
4759         nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4760         NFSUNLOCKCLSTATE();
4761         return (1);
4762 }
4763
4764 /*
4765  * Release a reference on a clientid acquired with the above call.
4766  */
4767 void
4768 nfscl_relref(struct nfsmount *nmp)
4769 {
4770         struct nfsclclient *clp;
4771
4772         NFSLOCKCLSTATE();
4773         clp = nfscl_findcl(nmp);
4774         if (clp == NULL) {
4775                 NFSUNLOCKCLSTATE();
4776                 return;
4777         }
4778         nfsv4_relref(&clp->nfsc_lock);
4779         NFSUNLOCKCLSTATE();
4780 }
4781
4782 /*
4783  * Save the size attribute in the delegation, since the nfsnode
4784  * is going away.
4785  */
4786 void
4787 nfscl_reclaimnode(vnode_t vp)
4788 {
4789         struct nfsclclient *clp;
4790         struct nfscldeleg *dp;
4791         struct nfsnode *np = VTONFS(vp);
4792         struct nfsmount *nmp;
4793
4794         nmp = VFSTONFS(vp->v_mount);
4795         if (!NFSHASNFSV4(nmp))
4796                 return;
4797         NFSLOCKCLSTATE();
4798         clp = nfscl_findcl(nmp);
4799         if (clp == NULL) {
4800                 NFSUNLOCKCLSTATE();
4801                 return;
4802         }
4803         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4804         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4805                 dp->nfsdl_size = np->n_size;
4806         NFSUNLOCKCLSTATE();
4807 }
4808
4809 /*
4810  * Get the saved size attribute in the delegation, since it is a
4811  * newly allocated nfsnode.
4812  */
4813 void
4814 nfscl_newnode(vnode_t vp)
4815 {
4816         struct nfsclclient *clp;
4817         struct nfscldeleg *dp;
4818         struct nfsnode *np = VTONFS(vp);
4819         struct nfsmount *nmp;
4820
4821         nmp = VFSTONFS(vp->v_mount);
4822         if (!NFSHASNFSV4(nmp))
4823                 return;
4824         NFSLOCKCLSTATE();
4825         clp = nfscl_findcl(nmp);
4826         if (clp == NULL) {
4827                 NFSUNLOCKCLSTATE();
4828                 return;
4829         }
4830         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4831         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4832                 np->n_size = dp->nfsdl_size;
4833         NFSUNLOCKCLSTATE();
4834 }
4835
4836 /*
4837  * If there is a valid write delegation for this file, set the modtime
4838  * to the local clock time.
4839  */
4840 void
4841 nfscl_delegmodtime(vnode_t vp)
4842 {
4843         struct nfsclclient *clp;
4844         struct nfscldeleg *dp;
4845         struct nfsnode *np = VTONFS(vp);
4846         struct nfsmount *nmp;
4847
4848         nmp = VFSTONFS(vp->v_mount);
4849         if (!NFSHASNFSV4(nmp))
4850                 return;
4851         NFSLOCKCLSTATE();
4852         clp = nfscl_findcl(nmp);
4853         if (clp == NULL) {
4854                 NFSUNLOCKCLSTATE();
4855                 return;
4856         }
4857         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4858         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4859                 nanotime(&dp->nfsdl_modtime);
4860                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4861         }
4862         NFSUNLOCKCLSTATE();
4863 }
4864
4865 /*
4866  * If there is a valid write delegation for this file with a modtime set,
4867  * put that modtime in mtime.
4868  */
4869 void
4870 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4871 {
4872         struct nfsclclient *clp;
4873         struct nfscldeleg *dp;
4874         struct nfsnode *np = VTONFS(vp);
4875         struct nfsmount *nmp;
4876
4877         nmp = VFSTONFS(vp->v_mount);
4878         if (!NFSHASNFSV4(nmp))
4879                 return;
4880         NFSLOCKCLSTATE();
4881         clp = nfscl_findcl(nmp);
4882         if (clp == NULL) {
4883                 NFSUNLOCKCLSTATE();
4884                 return;
4885         }
4886         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4887         if (dp != NULL &&
4888             (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4889             (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4890                 *mtime = dp->nfsdl_modtime;
4891         NFSUNLOCKCLSTATE();
4892 }
4893
4894 static int
4895 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4896 {
4897         short *defaulterrp, *errp;
4898
4899         if (!nd->nd_repstat)
4900                 return (0);
4901         if (nd->nd_procnum == NFSPROC_NOOP)
4902                 return (txdr_unsigned(nd->nd_repstat & 0xffff));
4903         if (nd->nd_repstat == EBADRPC)
4904                 return (txdr_unsigned(NFSERR_BADXDR));
4905         if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4906             nd->nd_repstat == NFSERR_OPILLEGAL)
4907                 return (txdr_unsigned(nd->nd_repstat));
4908         if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4909             minorvers > NFSV4_MINORVERSION) {
4910                 /* NFSv4.n error. */
4911                 return (txdr_unsigned(nd->nd_repstat));
4912         }
4913         if (nd->nd_procnum < NFSV4OP_CBNOPS)
4914                 errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4915         else
4916                 return (txdr_unsigned(nd->nd_repstat));
4917         while (*++errp)
4918                 if (*errp == (short)nd->nd_repstat)
4919                         return (txdr_unsigned(nd->nd_repstat));
4920         return (txdr_unsigned(*defaulterrp));
4921 }
4922
4923 /*
4924  * Called to find/add a layout to a client.
4925  * This function returns the layout with a refcnt (shared lock) upon
4926  * success (returns 0) or with no lock/refcnt on the layout when an
4927  * error is returned.
4928  * If a layout is passed in via lypp, it is locked (exclusively locked).
4929  */
4930 int
4931 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4932     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4933     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4934     struct ucred *cred, NFSPROC_T *p)
4935 {
4936         struct nfsclclient *clp;
4937         struct nfscllayout *lyp, *tlyp;
4938         struct nfsclflayout *flp;
4939         struct nfsnode *np = VTONFS(vp);
4940         mount_t mp;
4941         int layout_passed_in;
4942
4943         mp = nmp->nm_mountp;
4944         layout_passed_in = 1;
4945         tlyp = NULL;
4946         lyp = *lypp;
4947         if (lyp == NULL) {
4948                 layout_passed_in = 0;
4949                 tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4950                     M_WAITOK | M_ZERO);
4951         }
4952
4953         NFSLOCKCLSTATE();
4954         clp = nmp->nm_clp;
4955         if (clp == NULL) {
4956                 if (layout_passed_in != 0)
4957                         nfsv4_unlock(&lyp->nfsly_lock, 0);
4958                 NFSUNLOCKCLSTATE();
4959                 if (tlyp != NULL)
4960                         free(tlyp, M_NFSLAYOUT);
4961                 return (EPERM);
4962         }
4963         if (lyp == NULL) {
4964                 /*
4965                  * Although no lyp was passed in, another thread might have
4966                  * allocated one. If one is found, just increment it's ref
4967                  * count and return it.
4968                  */
4969                 lyp = nfscl_findlayout(clp, fhp, fhlen);
4970                 if (lyp == NULL) {
4971                         lyp = tlyp;
4972                         tlyp = NULL;
4973                         lyp->nfsly_stateid.seqid = stateidp->seqid;
4974                         lyp->nfsly_stateid.other[0] = stateidp->other[0];
4975                         lyp->nfsly_stateid.other[1] = stateidp->other[1];
4976                         lyp->nfsly_stateid.other[2] = stateidp->other[2];
4977                         lyp->nfsly_lastbyte = 0;
4978                         LIST_INIT(&lyp->nfsly_flayread);
4979                         LIST_INIT(&lyp->nfsly_flayrw);
4980                         LIST_INIT(&lyp->nfsly_recall);
4981                         lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
4982                         lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
4983                         lyp->nfsly_clp = clp;
4984                         if (layouttype == NFSLAYOUT_FLEXFILE)
4985                                 lyp->nfsly_flags = NFSLY_FLEXFILE;
4986                         else
4987                                 lyp->nfsly_flags = NFSLY_FILES;
4988                         if (retonclose != 0)
4989                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4990                         lyp->nfsly_fhlen = fhlen;
4991                         NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
4992                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4993                         LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
4994                             nfsly_hash);
4995                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4996                         nfscl_layoutcnt++;
4997                 } else {
4998                         if (retonclose != 0)
4999                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5000                         if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5001                                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5002                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5003                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5004                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5005                 }
5006                 nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5007                 if (NFSCL_FORCEDISM(mp)) {
5008                         NFSUNLOCKCLSTATE();
5009                         if (tlyp != NULL)
5010                                 free(tlyp, M_NFSLAYOUT);
5011                         return (EPERM);
5012                 }
5013                 *lypp = lyp;
5014         } else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5015                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5016
5017         /* Merge the new list of File Layouts into the list. */
5018         flp = LIST_FIRST(fhlp);
5019         if (flp != NULL) {
5020                 if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5021                         nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5022                 else
5023                         nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5024         }
5025         if (layout_passed_in != 0)
5026                 nfsv4_unlock(&lyp->nfsly_lock, 1);
5027         NFSUNLOCKCLSTATE();
5028         if (tlyp != NULL)
5029                 free(tlyp, M_NFSLAYOUT);
5030         return (0);
5031 }
5032
5033 /*
5034  * Search for a layout by MDS file handle.
5035  * If one is found, it is returned with a refcnt (shared lock) iff
5036  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5037  * returned NULL.
5038  */
5039 struct nfscllayout *
5040 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5041     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
5042 {
5043         struct nfscllayout *lyp;
5044         mount_t mp;
5045         int error, igotlock;
5046
5047         mp = clp->nfsc_nmp->nm_mountp;
5048         *recalledp = 0;
5049         *retflpp = NULL;
5050         NFSLOCKCLSTATE();
5051         lyp = nfscl_findlayout(clp, fhp, fhlen);
5052         if (lyp != NULL) {
5053                 if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5054                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5055                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5056                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5057                         error = nfscl_findlayoutforio(lyp, off,
5058                             NFSV4OPEN_ACCESSREAD, retflpp);
5059                         if (error == 0)
5060                                 nfsv4_getref(&lyp->nfsly_lock, NULL,
5061                                     NFSCLSTATEMUTEXPTR, mp);
5062                         else {
5063                                 do {
5064                                         igotlock = nfsv4_lock(&lyp->nfsly_lock,
5065                                             1, NULL, NFSCLSTATEMUTEXPTR, mp);
5066                                 } while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5067                                 *retflpp = NULL;
5068                         }
5069                         if (NFSCL_FORCEDISM(mp)) {
5070                                 lyp = NULL;
5071                                 *recalledp = 1;
5072                         }
5073                 } else {
5074                         lyp = NULL;
5075                         *recalledp = 1;
5076                 }
5077         }
5078         NFSUNLOCKCLSTATE();
5079         return (lyp);
5080 }
5081
5082 /*
5083  * Search for a layout by MDS file handle. If one is found, mark in to be
5084  * recalled, if it already marked "return on close".
5085  */
5086 static void
5087 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5088     int fhlen, struct nfsclrecalllayout **recallpp)
5089 {
5090         struct nfscllayout *lyp;
5091         uint32_t iomode;
5092
5093         if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5094             nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5095             (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5096                 return;
5097         lyp = nfscl_findlayout(clp, fhp, fhlen);
5098         if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5099             NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5100                 iomode = 0;
5101                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5102                         iomode |= NFSLAYOUTIOMODE_READ;
5103                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5104                         iomode |= NFSLAYOUTIOMODE_RW;
5105                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5106                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5107                     *recallpp);
5108                 NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5109                 *recallpp = NULL;
5110         }
5111 }
5112
5113 /*
5114  * Mark the layout to be recalled and with an error.
5115  * Also, disable the dsp from further use.
5116  */
5117 void
5118 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5119     struct nfscllayout *lyp, struct nfsclds *dsp)
5120 {
5121         struct nfsclrecalllayout *recallp;
5122         uint32_t iomode;
5123
5124         printf("DS being disabled, error=%d\n", stat);
5125         /* Set up the return of the layout. */
5126         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5127         iomode = 0;
5128         NFSLOCKCLSTATE();
5129         if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5130                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5131                         iomode |= NFSLAYOUTIOMODE_READ;
5132                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5133                         iomode |= NFSLAYOUTIOMODE_RW;
5134                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5135                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5136                     dp->nfsdi_deviceid, recallp);
5137                 NFSUNLOCKCLSTATE();
5138                 NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5139         } else {
5140                 NFSUNLOCKCLSTATE();
5141                 free(recallp, M_NFSLAYRECALL);
5142         }
5143
5144         /* And shut the TCP connection down. */
5145         nfscl_cancelreqs(dsp);
5146 }
5147
5148 /*
5149  * Cancel all RPCs for this "dsp" by closing the connection.
5150  * Also, mark the session as defunct.
5151  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5152  * cannot be shut down.
5153  */
5154 void
5155 nfscl_cancelreqs(struct nfsclds *dsp)
5156 {
5157         struct __rpc_client *cl;
5158         static int non_event;
5159
5160         NFSLOCKDS(dsp);
5161         if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5162             dsp->nfsclds_sockp != NULL &&
5163             dsp->nfsclds_sockp->nr_client != NULL) {
5164                 dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5165                 cl = dsp->nfsclds_sockp->nr_client;
5166                 dsp->nfsclds_sess.nfsess_defunct = 1;
5167                 NFSUNLOCKDS(dsp);
5168                 CLNT_CLOSE(cl);
5169                 /*
5170                  * This 1sec sleep is done to reduce the number of reconnect
5171                  * attempts made on the DS while it has failed.
5172                  */
5173                 tsleep(&non_event, PVFS, "ndscls", hz);
5174                 return;
5175         }
5176         NFSUNLOCKDS(dsp);
5177 }
5178
5179 /*
5180  * Dereference a layout.
5181  */
5182 void
5183 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5184 {
5185
5186         NFSLOCKCLSTATE();
5187         if (exclocked != 0)
5188                 nfsv4_unlock(&lyp->nfsly_lock, 0);
5189         else
5190                 nfsv4_relref(&lyp->nfsly_lock);
5191         NFSUNLOCKCLSTATE();
5192 }
5193
5194 /*
5195  * Search for a devinfo by deviceid. If one is found, return it after
5196  * acquiring a reference count on it.
5197  */
5198 struct nfscldevinfo *
5199 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5200     struct nfscldevinfo *dip)
5201 {
5202
5203         NFSLOCKCLSTATE();
5204         if (dip == NULL)
5205                 dip = nfscl_finddevinfo(clp, deviceid);
5206         if (dip != NULL)
5207                 dip->nfsdi_refcnt++;
5208         NFSUNLOCKCLSTATE();
5209         return (dip);
5210 }
5211
5212 /*
5213  * Dereference a devinfo structure.
5214  */
5215 static void
5216 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5217 {
5218
5219         dip->nfsdi_refcnt--;
5220         if (dip->nfsdi_refcnt == 0)
5221                 wakeup(&dip->nfsdi_refcnt);
5222 }
5223
5224 /*
5225  * Dereference a devinfo structure.
5226  */
5227 void
5228 nfscl_reldevinfo(struct nfscldevinfo *dip)
5229 {
5230
5231         NFSLOCKCLSTATE();
5232         nfscl_reldevinfo_locked(dip);
5233         NFSUNLOCKCLSTATE();
5234 }
5235
5236 /*
5237  * Find a layout for this file handle. Return NULL upon failure.
5238  */
5239 static struct nfscllayout *
5240 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5241 {
5242         struct nfscllayout *lyp;
5243
5244         LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5245                 if (lyp->nfsly_fhlen == fhlen &&
5246                     !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5247                         break;
5248         return (lyp);
5249 }
5250
5251 /*
5252  * Find a devinfo for this deviceid. Return NULL upon failure.
5253  */
5254 static struct nfscldevinfo *
5255 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5256 {
5257         struct nfscldevinfo *dip;
5258
5259         LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5260                 if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5261                     == 0)
5262                         break;
5263         return (dip);
5264 }
5265
5266 /*
5267  * Merge the new file layout list into the main one, maintaining it in
5268  * increasing offset order.
5269  */
5270 static void
5271 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5272     struct nfsclflayouthead *newfhlp)
5273 {
5274         struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5275
5276         flp = LIST_FIRST(fhlp);
5277         prevflp = NULL;
5278         LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5279                 while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5280                         prevflp = flp;
5281                         flp = LIST_NEXT(flp, nfsfl_list);
5282                 }
5283                 if (prevflp == NULL)
5284                         LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5285                 else
5286                         LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5287                 prevflp = nflp;
5288         }
5289 }
5290
5291 /*
5292  * Add this nfscldevinfo to the client, if it doesn't already exist.
5293  * This function consumes the structure pointed at by dip, if not NULL.
5294  */
5295 int
5296 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5297     struct nfsclflayout *flp)
5298 {
5299         struct nfsclclient *clp;
5300         struct nfscldevinfo *tdip;
5301         uint8_t *dev;
5302
5303         NFSLOCKCLSTATE();
5304         clp = nmp->nm_clp;
5305         if (clp == NULL) {
5306                 NFSUNLOCKCLSTATE();
5307                 if (dip != NULL)
5308                         free(dip, M_NFSDEVINFO);
5309                 return (ENODEV);
5310         }
5311         if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5312                 dev = flp->nfsfl_dev;
5313         else
5314                 dev = flp->nfsfl_ffm[ind].dev;
5315         tdip = nfscl_finddevinfo(clp, dev);
5316         if (tdip != NULL) {
5317                 tdip->nfsdi_layoutrefs++;
5318                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5319                         flp->nfsfl_devp = tdip;
5320                 else
5321                         flp->nfsfl_ffm[ind].devp = tdip;
5322                 nfscl_reldevinfo_locked(tdip);
5323                 NFSUNLOCKCLSTATE();
5324                 if (dip != NULL)
5325                         free(dip, M_NFSDEVINFO);
5326                 return (0);
5327         }
5328         if (dip != NULL) {
5329                 LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5330                 dip->nfsdi_layoutrefs = 1;
5331                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5332                         flp->nfsfl_devp = dip;
5333                 else
5334                         flp->nfsfl_ffm[ind].devp = dip;
5335         }
5336         NFSUNLOCKCLSTATE();
5337         if (dip == NULL)
5338                 return (ENODEV);
5339         return (0);
5340 }
5341
5342 /*
5343  * Free up a layout structure and associated file layout structure(s).
5344  */
5345 void
5346 nfscl_freelayout(struct nfscllayout *layp)
5347 {
5348         struct nfsclflayout *flp, *nflp;
5349         struct nfsclrecalllayout *rp, *nrp;
5350
5351         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5352                 LIST_REMOVE(flp, nfsfl_list);
5353                 nfscl_freeflayout(flp);
5354         }
5355         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5356                 LIST_REMOVE(flp, nfsfl_list);
5357                 nfscl_freeflayout(flp);
5358         }
5359         LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5360                 LIST_REMOVE(rp, nfsrecly_list);
5361                 free(rp, M_NFSLAYRECALL);
5362         }
5363         nfscl_layoutcnt--;
5364         free(layp, M_NFSLAYOUT);
5365 }
5366
5367 /*
5368  * Free up a file layout structure.
5369  */
5370 void
5371 nfscl_freeflayout(struct nfsclflayout *flp)
5372 {
5373         int i, j;
5374
5375         if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5376                 for (i = 0; i < flp->nfsfl_fhcnt; i++)
5377                         free(flp->nfsfl_fh[i], M_NFSFH);
5378                 if (flp->nfsfl_devp != NULL)
5379                         flp->nfsfl_devp->nfsdi_layoutrefs--;
5380         }
5381         if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5382                 for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5383                         for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5384                                 free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5385                         if (flp->nfsfl_ffm[i].devp != NULL)     
5386                                 flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;     
5387                 }
5388         free(flp, M_NFSFLAYOUT);
5389 }
5390
5391 /*
5392  * Free up a file layout devinfo structure.
5393  */
5394 void
5395 nfscl_freedevinfo(struct nfscldevinfo *dip)
5396 {
5397
5398         free(dip, M_NFSDEVINFO);
5399 }
5400
5401 /*
5402  * Mark any layouts that match as recalled.
5403  */
5404 static int
5405 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5406     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5407     char *devid, struct nfsclrecalllayout *recallp)
5408 {
5409         struct nfsclrecalllayout *rp, *orp;
5410
5411         recallp->nfsrecly_recalltype = recalltype;
5412         recallp->nfsrecly_iomode = iomode;
5413         recallp->nfsrecly_stateseqid = stateseqid;
5414         recallp->nfsrecly_off = off;
5415         recallp->nfsrecly_len = len;
5416         recallp->nfsrecly_stat = stat;
5417         recallp->nfsrecly_op = op;
5418         if (devid != NULL)
5419                 NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5420         /*
5421          * Order the list as file returns first, followed by fsid and any
5422          * returns, both in increasing stateseqid order.
5423          * Note that the seqids wrap around, so 1 is after 0xffffffff.
5424          * (I'm not sure this is correct because I find RFC5661 confusing
5425          *  on this, but hopefully it will work ok.)
5426          */
5427         orp = NULL;
5428         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5429                 orp = rp;
5430                 if ((recalltype == NFSLAYOUTRETURN_FILE &&
5431                      (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5432                       nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5433                     (recalltype != NFSLAYOUTRETURN_FILE &&
5434                      rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5435                      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5436                         LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5437                         break;
5438                 }
5439
5440                 /*
5441                  * Put any error return on all the file returns that will
5442                  * preceed this one.
5443                  */
5444                 if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5445                    stat != 0 && rp->nfsrecly_stat == 0) {
5446                         rp->nfsrecly_stat = stat;
5447                         rp->nfsrecly_op = op;
5448                         if (devid != NULL)
5449                                 NFSBCOPY(devid, rp->nfsrecly_devid,
5450                                     NFSX_V4DEVICEID);
5451                 }
5452         }
5453         if (rp == NULL) {
5454                 if (orp == NULL)
5455                         LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5456                             nfsrecly_list);
5457                 else
5458                         LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5459         }
5460         lyp->nfsly_flags |= NFSLY_RECALL;
5461         wakeup(lyp->nfsly_clp);
5462         return (0);
5463 }
5464
5465 /*
5466  * Compare the two seqids for ordering. The trick is that the seqids can
5467  * wrap around from 0xffffffff->0, so check for the cases where one
5468  * has wrapped around.
5469  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5470  */
5471 static int
5472 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5473 {
5474
5475         if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5476                 /* seqid2 has wrapped around. */
5477                 return (0);
5478         if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5479                 /* seqid1 has wrapped around. */
5480                 return (1);
5481         if (seqid1 <= seqid2)
5482                 return (1);
5483         return (0);
5484 }
5485
5486 /*
5487  * Do a layout return for each of the recalls.
5488  */
5489 static void
5490 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5491     struct ucred *cred, NFSPROC_T *p)
5492 {
5493         struct nfsclrecalllayout *rp;
5494         nfsv4stateid_t stateid;
5495         int layouttype;
5496
5497         NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5498         stateid.seqid = lyp->nfsly_stateid.seqid;
5499         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5500                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5501         else
5502                 layouttype = NFSLAYOUT_FLEXFILE;
5503         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5504                 (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5505                     lyp->nfsly_fhlen, 0, layouttype,
5506                     rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5507                     rp->nfsrecly_off, rp->nfsrecly_len,
5508                     &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5509                     rp->nfsrecly_devid);
5510         }
5511 }
5512
5513 /*
5514  * Do the layout commit for a file layout.
5515  */
5516 static void
5517 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5518     struct ucred *cred, NFSPROC_T *p)
5519 {
5520         struct nfsclflayout *flp;
5521         uint64_t len;
5522         int error, layouttype;
5523
5524         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5525                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5526         else
5527                 layouttype = NFSLAYOUT_FLEXFILE;
5528         LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5529                 if (layouttype == NFSLAYOUT_FLEXFILE &&
5530                     (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5531                         NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5532                         /* If not supported, don't bother doing it. */
5533                         NFSLOCKMNT(nmp);
5534                         nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5535                         NFSUNLOCKMNT(nmp);
5536                         break;
5537                 } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5538                         len = flp->nfsfl_end - flp->nfsfl_off;
5539                         error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5540                             lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5541                             lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5542                             layouttype, cred, p, NULL);
5543                         NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5544                         if (error == NFSERR_NOTSUPP) {
5545                                 /* If not supported, don't bother doing it. */
5546                                 NFSLOCKMNT(nmp);
5547                                 nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5548                                 NFSUNLOCKMNT(nmp);
5549                                 break;
5550                         }
5551                 }
5552         }
5553 }
5554
5555 /*
5556  * Commit all layouts for a file (vnode).
5557  */
5558 int
5559 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5560 {
5561         struct nfsclclient *clp;
5562         struct nfscllayout *lyp;
5563         struct nfsnode *np = VTONFS(vp);
5564         mount_t mp;
5565         struct nfsmount *nmp;
5566
5567         mp = vp->v_mount;
5568         nmp = VFSTONFS(mp);
5569         if (NFSHASNOLAYOUTCOMMIT(nmp))
5570                 return (0);
5571         NFSLOCKCLSTATE();
5572         clp = nmp->nm_clp;
5573         if (clp == NULL) {
5574                 NFSUNLOCKCLSTATE();
5575                 return (EPERM);
5576         }
5577         lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5578         if (lyp == NULL) {
5579                 NFSUNLOCKCLSTATE();
5580                 return (EPERM);
5581         }
5582         nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5583         if (NFSCL_FORCEDISM(mp)) {
5584                 NFSUNLOCKCLSTATE();
5585                 return (EPERM);
5586         }
5587 tryagain:
5588         if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5589                 lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5590                 NFSUNLOCKCLSTATE();
5591                 NFSCL_DEBUG(4, "do layoutcommit2\n");
5592                 nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5593                 NFSLOCKCLSTATE();
5594                 goto tryagain;
5595         }
5596         nfsv4_relref(&lyp->nfsly_lock);
5597         NFSUNLOCKCLSTATE();
5598         return (0);
5599 }