]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clstate.c
zfs: merge openzfs/zfs@14b43fbd9 (master) into main
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clstate.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82
83 #include <fs/nfs/nfsport.h>
84
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;     /* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
104     u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
105     struct nfscllockowner **, struct nfsclopen **);
106 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
107     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
108     struct nfsclopen **);
109 static void nfscl_clrelease(struct nfsclclient *);
110 static void nfscl_cleanclient(struct nfsclclient *);
111 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
112     struct ucred *, NFSPROC_T *);
113 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
114     struct nfsmount *, struct ucred *, NFSPROC_T *);
115 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
116     NFSPROC_T *);
117 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
118     struct nfscllock *, int);
119 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
120     struct nfscllock **, int);
121 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
122 static u_int32_t nfscl_nextcbident(void);
123 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
124 static struct nfsclclient *nfscl_getclnt(u_int32_t);
125 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
126 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
127     int);
128 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
129     int, struct nfsclrecalllayout **);
130 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
131 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
132     int);
133 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
134 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
135     u_int8_t *, struct nfscllock **);
136 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
137 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
138     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
139 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
140     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
141     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
142 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
143     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
144     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
145 static void nfscl_totalrecall(struct nfsclclient *);
146 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
147     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
148 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
149     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
150     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
151 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
152     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
153     struct ucred *, NFSPROC_T *);
154 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
155     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
156 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
157     bool);
158 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
159 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
160 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
161     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
162     vnode_t *);
163 static void nfscl_freeopenowner(struct nfsclowner *, int);
164 static void nfscl_cleandeleg(struct nfscldeleg *);
165 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
166     struct nfsmount *, NFSPROC_T *);
167 static void nfscl_emptylockowner(struct nfscllockowner *,
168     struct nfscllockownerfhhead *);
169 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
170     struct nfsclflayouthead *);
171 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
172     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
173 static int nfscl_seq(uint32_t, uint32_t);
174 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
175     struct ucred *, NFSPROC_T *);
176 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
177     struct ucred *, NFSPROC_T *);
178
179 static short nfscberr_null[] = {
180         0,
181         0,
182 };
183
184 static short nfscberr_getattr[] = {
185         NFSERR_RESOURCE,
186         NFSERR_BADHANDLE,
187         NFSERR_BADXDR,
188         NFSERR_RESOURCE,
189         NFSERR_SERVERFAULT,
190         0,
191 };
192
193 static short nfscberr_recall[] = {
194         NFSERR_RESOURCE,
195         NFSERR_BADHANDLE,
196         NFSERR_BADSTATEID,
197         NFSERR_BADXDR,
198         NFSERR_RESOURCE,
199         NFSERR_SERVERFAULT,
200         0,
201 };
202
203 static short *nfscl_cberrmap[] = {
204         nfscberr_null,
205         nfscberr_null,
206         nfscberr_null,
207         nfscberr_getattr,
208         nfscberr_recall
209 };
210
211 #define NETFAMILY(clp) \
212                 (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
213
214 /*
215  * Called for an open operation.
216  * If the nfhp argument is NULL, just get an openowner.
217  */
218 int
219 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
220     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
221     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
222 {
223         struct nfsclclient *clp;
224         struct nfsclowner *owp, *nowp;
225         struct nfsclopen *op = NULL, *nop = NULL;
226         struct nfscldeleg *dp;
227         struct nfsclownerhead *ohp;
228         u_int8_t own[NFSV4CL_LOCKNAMELEN];
229         int ret;
230
231         if (newonep != NULL)
232                 *newonep = 0;
233         if (opp != NULL)
234                 *opp = NULL;
235         if (owpp != NULL)
236                 *owpp = NULL;
237
238         /*
239          * Might need one or both of these, so MALLOC them now, to
240          * avoid a tsleep() in MALLOC later.
241          */
242         nowp = malloc(sizeof (struct nfsclowner),
243             M_NFSCLOWNER, M_WAITOK);
244         if (nfhp != NULL) {
245             nop = malloc(sizeof (struct nfsclopen) +
246                 fhlen - 1, M_NFSCLOPEN, M_WAITOK);
247             nop->nfso_hash.le_prev = NULL;
248         }
249         ret = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
250         if (ret != 0) {
251                 free(nowp, M_NFSCLOWNER);
252                 if (nop != NULL)
253                         free(nop, M_NFSCLOPEN);
254                 return (ret);
255         }
256
257         /*
258          * Get the Open iff it already exists.
259          * If none found, add the new one or return error, depending upon
260          * "create".
261          */
262         NFSLOCKCLSTATE();
263         dp = NULL;
264         /* First check the delegation list */
265         if (nfhp != NULL && usedeleg) {
266                 LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
267                         if (dp->nfsdl_fhlen == fhlen &&
268                             !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
269                                 if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
270                                     (dp->nfsdl_flags & NFSCLDL_WRITE))
271                                         break;
272                                 dp = NULL;
273                                 break;
274                         }
275                 }
276         }
277
278         /* For NFSv4.1/4.2 and this option, use a single open_owner. */
279         if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
280                 nfscl_filllockowner(NULL, own, F_POSIX);
281         else
282                 nfscl_filllockowner(p->td_proc, own, F_POSIX);
283         if (dp != NULL)
284                 ohp = &dp->nfsdl_owner;
285         else
286                 ohp = &clp->nfsc_owner;
287         /* Now, search for an openowner */
288         LIST_FOREACH(owp, ohp, nfsow_list) {
289                 if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
290                         break;
291         }
292
293         /*
294          * Create a new open, as required.
295          */
296         nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
297             cred, newonep);
298
299         /*
300          * Now, check the mode on the open and return the appropriate
301          * value.
302          */
303         if (retp != NULL) {
304                 if (nfhp != NULL && dp != NULL && nop == NULL)
305                         /* new local open on delegation */
306                         *retp = NFSCLOPEN_SETCRED;
307                 else
308                         *retp = NFSCLOPEN_OK;
309         }
310         if (op != NULL && (amode & ~(op->nfso_mode))) {
311                 op->nfso_mode |= amode;
312                 if (retp != NULL && dp == NULL)
313                         *retp = NFSCLOPEN_DOOPEN;
314         }
315
316         /*
317          * Serialize modifications to the open owner for multiple threads
318          * within the same process using a read/write sleep lock.
319          * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
320          * by acquiring a shared lock.  The close operations still use an
321          * exclusive lock for this case.
322          */
323         if (lockit != 0) {
324                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
325                         /*
326                          * Get a shared lock on the OpenOwner, but first
327                          * wait for any pending exclusive lock, so that the
328                          * exclusive locker gets priority.
329                          */
330                         nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
331                             NFSCLSTATEMUTEXPTR, NULL);
332                         nfsv4_getref(&owp->nfsow_rwlock, NULL,
333                             NFSCLSTATEMUTEXPTR, NULL);
334                 } else
335                         nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
336         }
337         NFSUNLOCKCLSTATE();
338         if (nowp != NULL)
339                 free(nowp, M_NFSCLOWNER);
340         if (nop != NULL)
341                 free(nop, M_NFSCLOPEN);
342         if (owpp != NULL)
343                 *owpp = owp;
344         if (opp != NULL)
345                 *opp = op;
346         return (0);
347 }
348
349 /*
350  * Create a new open, as required.
351  */
352 static void
353 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
354     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
355     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
356     struct ucred *cred, int *newonep)
357 {
358         struct nfsclowner *owp = *owpp, *nowp;
359         struct nfsclopen *op, *nop;
360
361         if (nowpp != NULL)
362                 nowp = *nowpp;
363         else
364                 nowp = NULL;
365         if (nopp != NULL)
366                 nop = *nopp;
367         else
368                 nop = NULL;
369         if (owp == NULL && nowp != NULL) {
370                 NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
371                 LIST_INIT(&nowp->nfsow_open);
372                 nowp->nfsow_clp = clp;
373                 nowp->nfsow_seqid = 0;
374                 nowp->nfsow_defunct = 0;
375                 nfscl_lockinit(&nowp->nfsow_rwlock);
376                 if (dp != NULL) {
377                         nfsstatsv1.cllocalopenowners++;
378                         LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
379                 } else {
380                         nfsstatsv1.clopenowners++;
381                         LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
382                 }
383                 owp = *owpp = nowp;
384                 *nowpp = NULL;
385                 if (newonep != NULL)
386                         *newonep = 1;
387         }
388
389          /* If an fhp has been specified, create an Open as well. */
390         if (fhp != NULL) {
391                 /* and look for the correct open, based upon FH */
392                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
393                         if (op->nfso_fhlen == fhlen &&
394                             !NFSBCMP(op->nfso_fh, fhp, fhlen))
395                                 break;
396                 }
397                 if (op == NULL && nop != NULL) {
398                         nop->nfso_own = owp;
399                         nop->nfso_mode = 0;
400                         nop->nfso_opencnt = 0;
401                         nop->nfso_posixlock = 1;
402                         nop->nfso_fhlen = fhlen;
403                         NFSBCOPY(fhp, nop->nfso_fh, fhlen);
404                         LIST_INIT(&nop->nfso_lock);
405                         nop->nfso_stateid.seqid = 0;
406                         nop->nfso_stateid.other[0] = 0;
407                         nop->nfso_stateid.other[1] = 0;
408                         nop->nfso_stateid.other[2] = 0;
409                         KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
410                         newnfs_copyincred(cred, &nop->nfso_cred);
411                         if (dp != NULL) {
412                                 TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
413                                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
414                                     nfsdl_list);
415                                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
416                                 nfsstatsv1.cllocalopens++;
417                         } else {
418                                 LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
419                                     nop, nfso_hash);
420                                 nfsstatsv1.clopens++;
421                         }
422                         LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
423                         *opp = nop;
424                         *nopp = NULL;
425                         if (newonep != NULL)
426                                 *newonep = 1;
427                 } else {
428                         *opp = op;
429                 }
430         }
431 }
432
433 /*
434  * Called to find/add a delegation to a client.
435  */
436 int
437 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
438     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
439 {
440         struct nfscldeleg *dp = *dpp, *tdp;
441         struct nfsmount *nmp;
442
443         KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
444         nmp = VFSTONFS(mp);
445         /*
446          * First, if we have received a Read delegation for a file on a
447          * read/write file system, just return it, because they aren't
448          * useful, imho.
449          */
450         if (dp != NULL && !NFSMNT_RDONLY(mp) &&
451             (dp->nfsdl_flags & NFSCLDL_READ)) {
452                 nfscl_trydelegreturn(dp, cred, nmp, p);
453                 free(dp, M_NFSCLDELEG);
454                 *dpp = NULL;
455                 return (0);
456         }
457
458         /*
459          * Since a delegation might be added to the mount,
460          * set NFSMNTP_DELEGISSUED now.  If a delegation already
461          * exagain ists, setting this flag is harmless.
462          */
463         NFSLOCKMNT(nmp);
464         nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
465         NFSUNLOCKMNT(nmp);
466
467         /* Look for the correct deleg, based upon FH */
468         NFSLOCKCLSTATE();
469         tdp = nfscl_finddeleg(clp, nfhp, fhlen);
470         if (tdp == NULL) {
471                 if (dp == NULL) {
472                         NFSUNLOCKCLSTATE();
473                         return (NFSERR_BADSTATEID);
474                 }
475                 *dpp = NULL;
476                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
477                 LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
478                     nfsdl_hash);
479                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
480                 nfsstatsv1.cldelegates++;
481                 nfscl_delegcnt++;
482         } else {
483                 /*
484                  * Delegation already exists, what do we do if a new one??
485                  */
486                 if (dp != NULL) {
487                         printf("Deleg already exists!\n");
488                         free(dp, M_NFSCLDELEG);
489                         *dpp = NULL;
490                 } else {
491                         *dpp = tdp;
492                 }
493         }
494         NFSUNLOCKCLSTATE();
495         return (0);
496 }
497
498 /*
499  * Find a delegation for this file handle. Return NULL upon failure.
500  */
501 static struct nfscldeleg *
502 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
503 {
504         struct nfscldeleg *dp;
505
506         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
507             if (dp->nfsdl_fhlen == fhlen &&
508                 !NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
509                 break;
510         }
511         return (dp);
512 }
513
514 /*
515  * Get a stateid for an I/O operation. First, look for an open and iff
516  * found, return either a lockowner stateid or the open stateid.
517  * If no Open is found, just return error and the special stateid of all zeros.
518  */
519 int
520 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
521     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
522     void **lckpp)
523 {
524         struct nfsclclient *clp;
525         struct nfsclopen *op = NULL, *top;
526         struct nfsclopenhash *oph;
527         struct nfscllockowner *lp;
528         struct nfscldeleg *dp;
529         struct nfsnode *np;
530         struct nfsmount *nmp;
531         u_int8_t own[NFSV4CL_LOCKNAMELEN];
532         int error;
533         bool done;
534
535         *lckpp = NULL;
536         /*
537          * Initially, just set the special stateid of all zeros.
538          * (Don't do this for a DS, since the special stateid can't be used.)
539          */
540         if (fords == 0) {
541                 stateidp->seqid = 0;
542                 stateidp->other[0] = 0;
543                 stateidp->other[1] = 0;
544                 stateidp->other[2] = 0;
545         }
546         if (vnode_vtype(vp) != VREG)
547                 return (EISDIR);
548         np = VTONFS(vp);
549         nmp = VFSTONFS(vp->v_mount);
550         NFSLOCKCLSTATE();
551         clp = nfscl_findcl(nmp);
552         if (clp == NULL) {
553                 NFSUNLOCKCLSTATE();
554                 return (EACCES);
555         }
556
557         /*
558          * Wait for recovery to complete.
559          */
560         while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
561                 (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
562                     PZERO, "nfsrecvr", NULL);
563
564         /*
565          * First, look for a delegation.
566          */
567         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
568                 if (dp->nfsdl_fhlen == fhlen &&
569                     !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
570                         if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
571                             (dp->nfsdl_flags & NFSCLDL_WRITE)) {
572                                 if (NFSHASNFSV4N(nmp))
573                                         stateidp->seqid = 0;
574                                 else
575                                         stateidp->seqid =
576                                             dp->nfsdl_stateid.seqid;
577                                 stateidp->other[0] = dp->nfsdl_stateid.other[0];
578                                 stateidp->other[1] = dp->nfsdl_stateid.other[1];
579                                 stateidp->other[2] = dp->nfsdl_stateid.other[2];
580                                 if (!(np->n_flag & NDELEGRECALL)) {
581                                         TAILQ_REMOVE(&clp->nfsc_deleg, dp,
582                                             nfsdl_list);
583                                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
584                                             nfsdl_list);
585                                         dp->nfsdl_timestamp = NFSD_MONOSEC +
586                                             120;
587                                         dp->nfsdl_rwlock.nfslock_usecnt++;
588                                         *lckpp = (void *)&dp->nfsdl_rwlock;
589                                 }
590                                 NFSUNLOCKCLSTATE();
591                                 return (0);
592                         }
593                         break;
594                 }
595         }
596
597         if (p != NULL) {
598                 /*
599                  * If p != NULL, we want to search the parentage tree
600                  * for a matching OpenOwner and use that.
601                  */
602                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
603                         nfscl_filllockowner(NULL, own, F_POSIX);
604                 else
605                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
606                 lp = NULL;
607                 error = nfscl_getopen(NULL, clp->nfsc_openhash, nfhp, fhlen,
608                     own, own, mode, &lp, &op);
609                 if (error == 0 && lp != NULL && fords == 0) {
610                         /* Don't return a lock stateid for a DS. */
611                         if (NFSHASNFSV4N(nmp))
612                                 stateidp->seqid = 0;
613                         else
614                                 stateidp->seqid = lp->nfsl_stateid.seqid;
615                         stateidp->other[0] =
616                             lp->nfsl_stateid.other[0];
617                         stateidp->other[1] =
618                             lp->nfsl_stateid.other[1];
619                         stateidp->other[2] =
620                             lp->nfsl_stateid.other[2];
621                         NFSUNLOCKCLSTATE();
622                         return (0);
623                 }
624         }
625         if (op == NULL) {
626                 /* If not found, just look for any OpenOwner that will work. */
627                 top = NULL;
628                 done = false;
629                 oph = NFSCLOPENHASH(clp, nfhp, fhlen);
630                 LIST_FOREACH(op, oph, nfso_hash) {
631                         if (op->nfso_fhlen == fhlen &&
632                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
633                                 if (top == NULL && (op->nfso_mode &
634                                     NFSV4OPEN_ACCESSWRITE) != 0 &&
635                                     (mode & NFSV4OPEN_ACCESSREAD) != 0)
636                                         top = op;
637                                 if ((mode & op->nfso_mode) == mode) {
638                                         /* LRU order the hash list. */
639                                         LIST_REMOVE(op, nfso_hash);
640                                         LIST_INSERT_HEAD(oph, op, nfso_hash);
641                                         done = true;
642                                         break;
643                                 }
644                         }
645                 }
646                 if (!done) {
647                         NFSCL_DEBUG(2, "openmode top=%p\n", top);
648                         if (top == NULL || NFSHASOPENMODE(nmp)) {
649                                 NFSUNLOCKCLSTATE();
650                                 return (ENOENT);
651                         } else
652                                 op = top;
653                 }
654                 /*
655                  * For read aheads or write behinds, use the open cred.
656                  * A read ahead or write behind is indicated by p == NULL.
657                  */
658                 if (p == NULL)
659                         newnfs_copycred(&op->nfso_cred, cred);
660         }
661
662         /*
663          * No lock stateid, so return the open stateid.
664          */
665         if (NFSHASNFSV4N(nmp))
666                 stateidp->seqid = 0;
667         else
668                 stateidp->seqid = op->nfso_stateid.seqid;
669         stateidp->other[0] = op->nfso_stateid.other[0];
670         stateidp->other[1] = op->nfso_stateid.other[1];
671         stateidp->other[2] = op->nfso_stateid.other[2];
672         NFSUNLOCKCLSTATE();
673         return (0);
674 }
675
676 /*
677  * Search for a matching file, mode and, optionally, lockowner.
678  */
679 static int
680 nfscl_getopen(struct nfsclownerhead *ohp, struct nfsclopenhash *ohashp,
681     u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown,
682     u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp)
683 {
684         struct nfsclowner *owp;
685         struct nfsclopen *op, *rop, *rop2;
686         struct nfsclopenhash *oph;
687         bool keep_looping;
688
689         KASSERT(ohp == NULL || ohashp == NULL, ("nfscl_getopen: "
690             "only one of ohp and ohashp can be set"));
691         if (lpp != NULL)
692                 *lpp = NULL;
693         /*
694          * rop will be set to the open to be returned. There are three
695          * variants of this, all for an open of the correct file:
696          * 1 - A match of lockown.
697          * 2 - A match of the openown, when no lockown match exists.
698          * 3 - A match for any open, if no openown or lockown match exists.
699          * Looking for #2 over #3 probably isn't necessary, but since
700          * RFC3530 is vague w.r.t. the relationship between openowners and
701          * lockowners, I think this is the safer way to go.
702          */
703         rop = NULL;
704         rop2 = NULL;
705         keep_looping = true;
706         /* Search the client list */
707         if (ohashp == NULL) {
708                 /* Search the local opens on the delegation. */
709                 LIST_FOREACH(owp, ohp, nfsow_list) {
710                         /* and look for the correct open */
711                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
712                                 if (op->nfso_fhlen == fhlen &&
713                                     !NFSBCMP(op->nfso_fh, nfhp, fhlen)
714                                     && (op->nfso_mode & mode) == mode)
715                                         keep_looping = nfscl_checkown(owp, op, openown,
716                                             lockown, lpp, &rop, &rop2);
717                                 if (!keep_looping)
718                                         break;
719                         }
720                         if (!keep_looping)
721                                 break;
722                 }
723         } else {
724                 /* Search for matching opens on the hash list. */
725                 oph = &ohashp[NFSCLOPENHASHFUNC(nfhp, fhlen)];
726                 LIST_FOREACH(op, oph, nfso_hash) {
727                         if (op->nfso_fhlen == fhlen &&
728                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)
729                             && (op->nfso_mode & mode) == mode)
730                                 keep_looping = nfscl_checkown(op->nfso_own, op,
731                                     openown, lockown, lpp, &rop, &rop2);
732                         if (!keep_looping) {
733                                 /* LRU order the hash list. */
734                                 LIST_REMOVE(op, nfso_hash);
735                                 LIST_INSERT_HEAD(oph, op, nfso_hash);
736                                 break;
737                         }
738                 }
739         }
740         if (rop == NULL)
741                 rop = rop2;
742         if (rop == NULL)
743                 return (EBADF);
744         *opp = rop;
745         return (0);
746 }
747
748 /* Check for an owner match. */
749 static bool
750 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
751     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
752     struct nfsclopen **ropp2)
753 {
754         struct nfscllockowner *lp;
755         bool keep_looping;
756
757         keep_looping = true;
758         if (lpp != NULL) {
759                 /* Now look for a matching lockowner. */
760                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
761                         if (!NFSBCMP(lp->nfsl_owner, lockown,
762                             NFSV4CL_LOCKNAMELEN)) {
763                                 *lpp = lp;
764                                 *ropp = op;
765                                 return (false);
766                         }
767                 }
768         }
769         if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
770             NFSV4CL_LOCKNAMELEN)) {
771                 *ropp = op;
772                 if (lpp == NULL)
773                         keep_looping = false;
774         }
775         if (*ropp2 == NULL)
776                 *ropp2 = op;
777         return (keep_looping);
778 }
779
780 /*
781  * Release use of an open owner. Called when open operations are done
782  * with the open owner.
783  */
784 void
785 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
786     __unused int error, __unused int candelete, int unlocked)
787 {
788
789         if (owp == NULL)
790                 return;
791         NFSLOCKCLSTATE();
792         if (unlocked == 0) {
793                 if (NFSHASONEOPENOWN(nmp))
794                         nfsv4_relref(&owp->nfsow_rwlock);
795                 else
796                         nfscl_lockunlock(&owp->nfsow_rwlock);
797         }
798         nfscl_clrelease(owp->nfsow_clp);
799         NFSUNLOCKCLSTATE();
800 }
801
802 /*
803  * Release use of an open structure under an open owner.
804  */
805 void
806 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
807     int candelete)
808 {
809         struct nfsclclient *clp;
810         struct nfsclowner *owp;
811
812         if (op == NULL)
813                 return;
814         NFSLOCKCLSTATE();
815         owp = op->nfso_own;
816         if (NFSHASONEOPENOWN(nmp))
817                 nfsv4_relref(&owp->nfsow_rwlock);
818         else
819                 nfscl_lockunlock(&owp->nfsow_rwlock);
820         clp = owp->nfsow_clp;
821         if (error && candelete && op->nfso_opencnt == 0)
822                 nfscl_freeopen(op, 0);
823         nfscl_clrelease(clp);
824         NFSUNLOCKCLSTATE();
825 }
826
827 /*
828  * Called to get a clientid structure. It will optionally lock the
829  * client data structures to do the SetClientId/SetClientId_confirm,
830  * but will release that lock and return the clientid with a reference
831  * count on it.
832  * If the "cred" argument is NULL, a new clientid should not be created.
833  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
834  * be done.
835  * It always clpp with a reference count on it, unless returning an error.
836  */
837 int
838 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
839     bool tryminvers, struct nfsclclient **clpp)
840 {
841         struct nfsclclient *clp;
842         struct nfsclclient *newclp = NULL;
843         struct nfsmount *nmp;
844         char uuid[HOSTUUIDLEN];
845         int igotlock = 0, error, trystalecnt, clidinusedelay, i;
846         u_int16_t idlen = 0;
847
848         nmp = VFSTONFS(mp);
849         if (cred != NULL) {
850                 getcredhostuuid(cred, uuid, sizeof uuid);
851                 idlen = strlen(uuid);
852                 if (idlen > 0)
853                         idlen += sizeof (u_int64_t);
854                 else
855                         idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
856                 newclp = malloc(
857                     sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
858                     M_WAITOK | M_ZERO);
859         }
860         NFSLOCKCLSTATE();
861         /*
862          * If a forced dismount is already in progress, don't
863          * allocate a new clientid and get out now. For the case where
864          * clp != NULL, this is a harmless optimization.
865          */
866         if (NFSCL_FORCEDISM(mp)) {
867                 NFSUNLOCKCLSTATE();
868                 if (newclp != NULL)
869                         free(newclp, M_NFSCLCLIENT);
870                 return (EBADF);
871         }
872         clp = nmp->nm_clp;
873         if (clp == NULL) {
874                 if (newclp == NULL) {
875                         NFSUNLOCKCLSTATE();
876                         return (EACCES);
877                 }
878                 clp = newclp;
879                 clp->nfsc_idlen = idlen;
880                 LIST_INIT(&clp->nfsc_owner);
881                 TAILQ_INIT(&clp->nfsc_deleg);
882                 TAILQ_INIT(&clp->nfsc_layout);
883                 LIST_INIT(&clp->nfsc_devinfo);
884                 for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
885                         LIST_INIT(&clp->nfsc_deleghash[i]);
886                 for (i = 0; i < NFSCLOPENHASHSIZE; i++)
887                         LIST_INIT(&clp->nfsc_openhash[i]);
888                 for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
889                         LIST_INIT(&clp->nfsc_layouthash[i]);
890                 clp->nfsc_flags = NFSCLFLAGS_INITED;
891                 clp->nfsc_clientidrev = 1;
892                 clp->nfsc_cbident = nfscl_nextcbident();
893                 nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
894                     clp->nfsc_idlen);
895                 LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
896                 nmp->nm_clp = clp;
897                 clp->nfsc_nmp = nmp;
898         } else {
899                 if (newclp != NULL)
900                         free(newclp, M_NFSCLCLIENT);
901         }
902         while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
903             !NFSCL_FORCEDISM(mp))
904                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
905                     NFSCLSTATEMUTEXPTR, mp);
906         if (igotlock == 0) {
907                 /*
908                  * Call nfsv4_lock() with "iwantlock == 0" so that it will
909                  * wait for a pending exclusive lock request.  This gives the
910                  * exclusive lock request priority over this shared lock
911                  * request.
912                  * An exclusive lock on nfsc_lock is used mainly for server
913                  * crash recoveries.
914                  */
915                 nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
916                 nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
917         }
918         if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
919                 /*
920                  * Both nfsv4_lock() and nfsv4_getref() know to check
921                  * for NFSCL_FORCEDISM() and return without sleeping to
922                  * wait for the exclusive lock to be released, since it
923                  * might be held by nfscl_umount() and we need to get out
924                  * now for that case and not wait until nfscl_umount()
925                  * releases it.
926                  */
927                 NFSUNLOCKCLSTATE();
928                 return (EBADF);
929         }
930         NFSUNLOCKCLSTATE();
931
932         /*
933          * If it needs a clientid, do the setclientid now.
934          */
935         if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
936                 if (!igotlock)
937                         panic("nfscl_clget");
938                 if (p == NULL || cred == NULL) {
939                         NFSLOCKCLSTATE();
940                         nfsv4_unlock(&clp->nfsc_lock, 0);
941                         NFSUNLOCKCLSTATE();
942                         return (EACCES);
943                 }
944                 /*
945                  * If RFC3530 Sec. 14.2.33 is taken literally,
946                  * NFSERR_CLIDINUSE will be returned persistently for the
947                  * case where a new mount of the same file system is using
948                  * a different principal. In practice, NFSERR_CLIDINUSE is
949                  * only returned when there is outstanding unexpired state
950                  * on the clientid. As such, try for twice the lease
951                  * interval, if we know what that is. Otherwise, make a
952                  * wild ass guess.
953                  * The case of returning NFSERR_STALECLIENTID is far less
954                  * likely, but might occur if there is a significant delay
955                  * between doing the SetClientID and SetClientIDConfirm Ops,
956                  * such that the server throws away the clientid before
957                  * receiving the SetClientIDConfirm.
958                  */
959                 if (clp->nfsc_renew > 0)
960                         clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
961                 else
962                         clidinusedelay = 120;
963                 trystalecnt = 3;
964                 do {
965                         error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
966                         if (error == NFSERR_STALECLIENTID ||
967                             error == NFSERR_STALEDONTRECOVER ||
968                             error == NFSERR_BADSESSION ||
969                             error == NFSERR_CLIDINUSE) {
970                                 (void) nfs_catnap(PZERO, error, "nfs_setcl");
971                         } else if (error == NFSERR_MINORVERMISMATCH &&
972                             tryminvers) {
973                                 if (nmp->nm_minorvers > 0)
974                                         nmp->nm_minorvers--;
975                                 else
976                                         tryminvers = false;
977                         }
978                 } while (((error == NFSERR_STALECLIENTID ||
979                      error == NFSERR_BADSESSION ||
980                      error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
981                     (error == NFSERR_CLIDINUSE && --clidinusedelay > 0) ||
982                     (error == NFSERR_MINORVERMISMATCH && tryminvers));
983                 if (error) {
984                         NFSLOCKCLSTATE();
985                         nfsv4_unlock(&clp->nfsc_lock, 0);
986                         NFSUNLOCKCLSTATE();
987                         return (error);
988                 }
989                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
990         }
991         if (igotlock) {
992                 NFSLOCKCLSTATE();
993                 nfsv4_unlock(&clp->nfsc_lock, 1);
994                 NFSUNLOCKCLSTATE();
995         }
996
997         *clpp = clp;
998         return (0);
999 }
1000
1001 /*
1002  * Get a reference to a clientid and return it, if valid.
1003  */
1004 struct nfsclclient *
1005 nfscl_findcl(struct nfsmount *nmp)
1006 {
1007         struct nfsclclient *clp;
1008
1009         clp = nmp->nm_clp;
1010         if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
1011                 return (NULL);
1012         return (clp);
1013 }
1014
1015 /*
1016  * Release the clientid structure. It may be locked or reference counted.
1017  */
1018 static void
1019 nfscl_clrelease(struct nfsclclient *clp)
1020 {
1021
1022         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1023                 nfsv4_unlock(&clp->nfsc_lock, 0);
1024         else
1025                 nfsv4_relref(&clp->nfsc_lock);
1026 }
1027
1028 /*
1029  * External call for nfscl_clrelease.
1030  */
1031 void
1032 nfscl_clientrelease(struct nfsclclient *clp)
1033 {
1034
1035         NFSLOCKCLSTATE();
1036         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1037                 nfsv4_unlock(&clp->nfsc_lock, 0);
1038         else
1039                 nfsv4_relref(&clp->nfsc_lock);
1040         NFSUNLOCKCLSTATE();
1041 }
1042
1043 /*
1044  * Called when wanting to lock a byte region.
1045  */
1046 int
1047 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1048     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
1049     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
1050     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1051 {
1052         struct nfscllockowner *lp;
1053         struct nfsclopen *op;
1054         struct nfsclclient *clp;
1055         struct nfscllockowner *nlp;
1056         struct nfscllock *nlop, *otherlop;
1057         struct nfscldeleg *dp = NULL, *ldp = NULL;
1058         struct nfscllockownerhead *lhp = NULL;
1059         struct nfsnode *np;
1060         u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1061         u_int8_t *openownp;
1062         int error = 0, ret, donelocally = 0;
1063         u_int32_t mode;
1064
1065         /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1066         mode = 0;
1067         np = VTONFS(vp);
1068         *lpp = NULL;
1069         lp = NULL;
1070         *newonep = 0;
1071         *donelocallyp = 0;
1072
1073         /*
1074          * Might need these, so MALLOC them now, to
1075          * avoid a tsleep() in MALLOC later.
1076          */
1077         nlp = malloc(
1078             sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1079         otherlop = malloc(
1080             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1081         nlop = malloc(
1082             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1083         nlop->nfslo_type = type;
1084         nlop->nfslo_first = off;
1085         if (len == NFS64BITSSET) {
1086                 nlop->nfslo_end = NFS64BITSSET;
1087         } else {
1088                 nlop->nfslo_end = off + len;
1089                 if (nlop->nfslo_end <= nlop->nfslo_first)
1090                         error = NFSERR_INVAL;
1091         }
1092
1093         if (!error) {
1094                 if (recovery)
1095                         clp = rclp;
1096                 else
1097                         error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1098         }
1099         if (error) {
1100                 free(nlp, M_NFSCLLOCKOWNER);
1101                 free(otherlop, M_NFSCLLOCK);
1102                 free(nlop, M_NFSCLLOCK);
1103                 return (error);
1104         }
1105
1106         op = NULL;
1107         if (recovery) {
1108                 ownp = rownp;
1109                 openownp = ropenownp;
1110         } else {
1111                 nfscl_filllockowner(id, own, flags);
1112                 ownp = own;
1113                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1114                         nfscl_filllockowner(NULL, openown, F_POSIX);
1115                 else
1116                         nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1117                 openownp = openown;
1118         }
1119         if (!recovery) {
1120                 NFSLOCKCLSTATE();
1121                 /*
1122                  * First, search for a delegation. If one exists for this file,
1123                  * the lock can be done locally against it, so long as there
1124                  * isn't a local lock conflict.
1125                  */
1126                 ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1127                     np->n_fhp->nfh_len);
1128                 /* Just sanity check for correct type of delegation */
1129                 if (dp != NULL && ((dp->nfsdl_flags &
1130                     (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1131                      (type == F_WRLCK &&
1132                       (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1133                         dp = NULL;
1134         }
1135         if (dp != NULL) {
1136                 /* Now, find an open and maybe a lockowner. */
1137                 ret = nfscl_getopen(&dp->nfsdl_owner, NULL, np->n_fhp->nfh_fh,
1138                     np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1139                 if (ret)
1140                         ret = nfscl_getopen(NULL, clp->nfsc_openhash,
1141                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1142                             ownp, mode, NULL, &op);
1143                 if (!ret) {
1144                         lhp = &dp->nfsdl_lock;
1145                         TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1146                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1147                         dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1148                         donelocally = 1;
1149                 } else {
1150                         dp = NULL;
1151                 }
1152         }
1153         if (!donelocally) {
1154                 /*
1155                  * Get the related Open and maybe lockowner.
1156                  */
1157                 error = nfscl_getopen(NULL, clp->nfsc_openhash,
1158                     np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1159                     ownp, mode, &lp, &op);
1160                 if (!error)
1161                         lhp = &op->nfso_lock;
1162         }
1163         if (!error && !recovery)
1164                 error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1165                     np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1166         if (error) {
1167                 if (!recovery) {
1168                         nfscl_clrelease(clp);
1169                         NFSUNLOCKCLSTATE();
1170                 }
1171                 free(nlp, M_NFSCLLOCKOWNER);
1172                 free(otherlop, M_NFSCLLOCK);
1173                 free(nlop, M_NFSCLLOCK);
1174                 return (error);
1175         }
1176
1177         /*
1178          * Ok, see if a lockowner exists and create one, as required.
1179          */
1180         if (lp == NULL)
1181                 LIST_FOREACH(lp, lhp, nfsl_list) {
1182                         if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1183                                 break;
1184                 }
1185         if (lp == NULL) {
1186                 NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1187                 if (recovery)
1188                         NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1189                             NFSV4CL_LOCKNAMELEN);
1190                 else
1191                         NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1192                             NFSV4CL_LOCKNAMELEN);
1193                 nlp->nfsl_seqid = 0;
1194                 nlp->nfsl_lockflags = flags;
1195                 nlp->nfsl_inprog = NULL;
1196                 nfscl_lockinit(&nlp->nfsl_rwlock);
1197                 LIST_INIT(&nlp->nfsl_lock);
1198                 if (donelocally) {
1199                         nlp->nfsl_open = NULL;
1200                         nfsstatsv1.cllocallockowners++;
1201                 } else {
1202                         nlp->nfsl_open = op;
1203                         nfsstatsv1.cllockowners++;
1204                 }
1205                 LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1206                 lp = nlp;
1207                 nlp = NULL;
1208                 *newonep = 1;
1209         }
1210
1211         /*
1212          * Now, update the byte ranges for locks.
1213          */
1214         ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1215         if (!ret)
1216                 donelocally = 1;
1217         if (donelocally) {
1218                 *donelocallyp = 1;
1219                 if (!recovery)
1220                         nfscl_clrelease(clp);
1221         } else {
1222                 /*
1223                  * Serial modifications on the lock owner for multiple threads
1224                  * for the same process using a read/write lock.
1225                  */
1226                 if (!recovery)
1227                         nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1228         }
1229         if (!recovery)
1230                 NFSUNLOCKCLSTATE();
1231
1232         if (nlp)
1233                 free(nlp, M_NFSCLLOCKOWNER);
1234         if (nlop)
1235                 free(nlop, M_NFSCLLOCK);
1236         if (otherlop)
1237                 free(otherlop, M_NFSCLLOCK);
1238
1239         *lpp = lp;
1240         return (0);
1241 }
1242
1243 /*
1244  * Called to unlock a byte range, for LockU.
1245  */
1246 int
1247 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1248     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1249     struct nfsclclient *clp, void *id, int flags,
1250     struct nfscllockowner **lpp, int *dorpcp)
1251 {
1252         struct nfscllockowner *lp;
1253         struct nfsclopen *op;
1254         struct nfscllock *nlop, *other_lop = NULL;
1255         struct nfscldeleg *dp;
1256         struct nfsnode *np;
1257         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1258         int ret = 0, fnd;
1259
1260         np = VTONFS(vp);
1261         *lpp = NULL;
1262         *dorpcp = 0;
1263
1264         /*
1265          * Might need these, so MALLOC them now, to
1266          * avoid a tsleep() in MALLOC later.
1267          */
1268         nlop = malloc(
1269             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1270         nlop->nfslo_type = F_UNLCK;
1271         nlop->nfslo_first = off;
1272         if (len == NFS64BITSSET) {
1273                 nlop->nfslo_end = NFS64BITSSET;
1274         } else {
1275                 nlop->nfslo_end = off + len;
1276                 if (nlop->nfslo_end <= nlop->nfslo_first) {
1277                         free(nlop, M_NFSCLLOCK);
1278                         return (NFSERR_INVAL);
1279                 }
1280         }
1281         if (callcnt == 0) {
1282                 other_lop = malloc(
1283                     sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1284                 *other_lop = *nlop;
1285         }
1286         nfscl_filllockowner(id, own, flags);
1287         dp = NULL;
1288         NFSLOCKCLSTATE();
1289         if (callcnt == 0)
1290                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1291                     np->n_fhp->nfh_len);
1292
1293         /*
1294          * First, unlock any local regions on a delegation.
1295          */
1296         if (dp != NULL) {
1297                 /* Look for this lockowner. */
1298                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1299                         if (!NFSBCMP(lp->nfsl_owner, own,
1300                             NFSV4CL_LOCKNAMELEN))
1301                                 break;
1302                 }
1303                 if (lp != NULL)
1304                         /* Use other_lop, so nlop is still available */
1305                         (void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1306         }
1307
1308         /*
1309          * Now, find a matching open/lockowner that hasn't already been done,
1310          * as marked by nfsl_inprog.
1311          */
1312         lp = NULL;
1313         fnd = 0;
1314         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1315             np->n_fhp->nfh_len), nfso_hash) {
1316                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1317                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1318                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1319                                 if (lp->nfsl_inprog == NULL &&
1320                                     !NFSBCMP(lp->nfsl_owner, own,
1321                                      NFSV4CL_LOCKNAMELEN)) {
1322                                         fnd = 1;
1323                                         break;
1324                                 }
1325                         }
1326                 }
1327                 if (fnd)
1328                         break;
1329         }
1330
1331         if (lp != NULL) {
1332                 ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1333                 if (ret)
1334                         *dorpcp = 1;
1335                 /*
1336                  * Serial modifications on the lock owner for multiple
1337                  * threads for the same process using a read/write lock.
1338                  */
1339                 lp->nfsl_inprog = p;
1340                 nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1341                 *lpp = lp;
1342         }
1343         NFSUNLOCKCLSTATE();
1344         if (nlop)
1345                 free(nlop, M_NFSCLLOCK);
1346         if (other_lop)
1347                 free(other_lop, M_NFSCLLOCK);
1348         return (0);
1349 }
1350
1351 /*
1352  * Release all lockowners marked in progess for this process and file.
1353  */
1354 void
1355 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1356     void *id, int flags)
1357 {
1358         struct nfsclopen *op;
1359         struct nfscllockowner *lp;
1360         struct nfsnode *np;
1361         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1362
1363         np = VTONFS(vp);
1364         nfscl_filllockowner(id, own, flags);
1365         NFSLOCKCLSTATE();
1366         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1367             np->n_fhp->nfh_len), nfso_hash) {
1368                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1369                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1370                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1371                                 if (lp->nfsl_inprog == p &&
1372                                     !NFSBCMP(lp->nfsl_owner, own,
1373                                     NFSV4CL_LOCKNAMELEN)) {
1374                                         lp->nfsl_inprog = NULL;
1375                                         nfscl_lockunlock(&lp->nfsl_rwlock);
1376                                 }
1377                         }
1378                 }
1379         }
1380         nfscl_clrelease(clp);
1381         NFSUNLOCKCLSTATE();
1382 }
1383
1384 /*
1385  * Called to find out if any bytes within the byte range specified are
1386  * write locked by the calling process. Used to determine if flushing
1387  * is required before a LockU.
1388  * If in doubt, return 1, so the flush will occur.
1389  */
1390 int
1391 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1392     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1393 {
1394         struct nfscllockowner *lp;
1395         struct nfsclopen *op;
1396         struct nfsclclient *clp;
1397         struct nfscllock *lop;
1398         struct nfscldeleg *dp;
1399         struct nfsnode *np;
1400         u_int64_t off, end;
1401         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1402         int error = 0;
1403
1404         np = VTONFS(vp);
1405         switch (fl->l_whence) {
1406         case SEEK_SET:
1407         case SEEK_CUR:
1408                 /*
1409                  * Caller is responsible for adding any necessary offset
1410                  * when SEEK_CUR is used.
1411                  */
1412                 off = fl->l_start;
1413                 break;
1414         case SEEK_END:
1415                 off = np->n_size + fl->l_start;
1416                 break;
1417         default:
1418                 return (1);
1419         }
1420         if (fl->l_len != 0) {
1421                 end = off + fl->l_len;
1422                 if (end < off)
1423                         return (1);
1424         } else {
1425                 end = NFS64BITSSET;
1426         }
1427
1428         error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1429         if (error)
1430                 return (1);
1431         nfscl_filllockowner(id, own, flags);
1432         NFSLOCKCLSTATE();
1433
1434         /*
1435          * First check the delegation locks.
1436          */
1437         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1438         if (dp != NULL) {
1439                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1440                         if (!NFSBCMP(lp->nfsl_owner, own,
1441                             NFSV4CL_LOCKNAMELEN))
1442                                 break;
1443                 }
1444                 if (lp != NULL) {
1445                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1446                                 if (lop->nfslo_first >= end)
1447                                         break;
1448                                 if (lop->nfslo_end <= off)
1449                                         continue;
1450                                 if (lop->nfslo_type == F_WRLCK) {
1451                                         nfscl_clrelease(clp);
1452                                         NFSUNLOCKCLSTATE();
1453                                         return (1);
1454                                 }
1455                         }
1456                 }
1457         }
1458
1459         /*
1460          * Now, check state against the server.
1461          */
1462         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1463             np->n_fhp->nfh_len), nfso_hash) {
1464                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1465                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1466                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1467                                 if (!NFSBCMP(lp->nfsl_owner, own,
1468                                     NFSV4CL_LOCKNAMELEN))
1469                                         break;
1470                         }
1471                         if (lp != NULL) {
1472                                 LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1473                                         if (lop->nfslo_first >= end)
1474                                                 break;
1475                                         if (lop->nfslo_end <= off)
1476                                                 continue;
1477                                         if (lop->nfslo_type == F_WRLCK) {
1478                                                 nfscl_clrelease(clp);
1479                                                 NFSUNLOCKCLSTATE();
1480                                                 return (1);
1481                                         }
1482                                 }
1483                         }
1484                 }
1485         }
1486         nfscl_clrelease(clp);
1487         NFSUNLOCKCLSTATE();
1488         return (0);
1489 }
1490
1491 /*
1492  * Release a byte range lock owner structure.
1493  */
1494 void
1495 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1496 {
1497         struct nfsclclient *clp;
1498
1499         if (lp == NULL)
1500                 return;
1501         NFSLOCKCLSTATE();
1502         clp = lp->nfsl_open->nfso_own->nfsow_clp;
1503         if (error != 0 && candelete &&
1504             (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1505                 nfscl_freelockowner(lp, 0);
1506         else
1507                 nfscl_lockunlock(&lp->nfsl_rwlock);
1508         nfscl_clrelease(clp);
1509         NFSUNLOCKCLSTATE();
1510 }
1511
1512 /*
1513  * Free up an open structure and any associated byte range lock structures.
1514  */
1515 void
1516 nfscl_freeopen(struct nfsclopen *op, int local)
1517 {
1518
1519         LIST_REMOVE(op, nfso_list);
1520         if (op->nfso_hash.le_prev != NULL)
1521                 LIST_REMOVE(op, nfso_hash);
1522         nfscl_freealllocks(&op->nfso_lock, local);
1523         free(op, M_NFSCLOPEN);
1524         if (local)
1525                 nfsstatsv1.cllocalopens--;
1526         else
1527                 nfsstatsv1.clopens--;
1528 }
1529
1530 /*
1531  * Free up all lock owners and associated locks.
1532  */
1533 static void
1534 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1535 {
1536         struct nfscllockowner *lp, *nlp;
1537
1538         LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1539                 if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1540                         panic("nfscllckw");
1541                 nfscl_freelockowner(lp, local);
1542         }
1543 }
1544
1545 /*
1546  * Called for an Open when NFSERR_EXPIRED is received from the server.
1547  * If there are no byte range locks nor a Share Deny lost, try to do a
1548  * fresh Open. Otherwise, free the open.
1549  */
1550 static int
1551 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1552     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1553 {
1554         struct nfscllockowner *lp;
1555         struct nfscldeleg *dp;
1556         int mustdelete = 0, error;
1557
1558         /*
1559          * Look for any byte range lock(s).
1560          */
1561         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1562                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
1563                         mustdelete = 1;
1564                         break;
1565                 }
1566         }
1567
1568         /*
1569          * If no byte range lock(s) nor a Share deny, try to re-open.
1570          */
1571         if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1572                 newnfs_copycred(&op->nfso_cred, cred);
1573                 dp = NULL;
1574                 error = nfsrpc_reopen(nmp, op->nfso_fh,
1575                     op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1576                 if (error) {
1577                         mustdelete = 1;
1578                         if (dp != NULL) {
1579                                 free(dp, M_NFSCLDELEG);
1580                                 dp = NULL;
1581                         }
1582                 }
1583                 if (dp != NULL)
1584                         nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1585                             op->nfso_fhlen, cred, p, &dp);
1586         }
1587
1588         /*
1589          * If a byte range lock or Share deny or couldn't re-open, free it.
1590          */
1591         if (mustdelete)
1592                 nfscl_freeopen(op, 0);
1593         return (mustdelete);
1594 }
1595
1596 /*
1597  * Free up an open owner structure.
1598  */
1599 static void
1600 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1601 {
1602
1603         LIST_REMOVE(owp, nfsow_list);
1604         free(owp, M_NFSCLOWNER);
1605         if (local)
1606                 nfsstatsv1.cllocalopenowners--;
1607         else
1608                 nfsstatsv1.clopenowners--;
1609 }
1610
1611 /*
1612  * Free up a byte range lock owner structure.
1613  */
1614 void
1615 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1616 {
1617         struct nfscllock *lop, *nlop;
1618
1619         LIST_REMOVE(lp, nfsl_list);
1620         LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1621                 nfscl_freelock(lop, local);
1622         }
1623         free(lp, M_NFSCLLOCKOWNER);
1624         if (local)
1625                 nfsstatsv1.cllocallockowners--;
1626         else
1627                 nfsstatsv1.cllockowners--;
1628 }
1629
1630 /*
1631  * Free up a byte range lock structure.
1632  */
1633 void
1634 nfscl_freelock(struct nfscllock *lop, int local)
1635 {
1636
1637         LIST_REMOVE(lop, nfslo_list);
1638         free(lop, M_NFSCLLOCK);
1639         if (local)
1640                 nfsstatsv1.cllocallocks--;
1641         else
1642                 nfsstatsv1.cllocks--;
1643 }
1644
1645 /*
1646  * Clean out the state related to a delegation.
1647  */
1648 static void
1649 nfscl_cleandeleg(struct nfscldeleg *dp)
1650 {
1651         struct nfsclowner *owp, *nowp;
1652         struct nfsclopen *op;
1653
1654         LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1655                 op = LIST_FIRST(&owp->nfsow_open);
1656                 if (op != NULL) {
1657                         if (LIST_NEXT(op, nfso_list) != NULL)
1658                                 panic("nfscleandel");
1659                         nfscl_freeopen(op, 1);
1660                 }
1661                 nfscl_freeopenowner(owp, 1);
1662         }
1663         nfscl_freealllocks(&dp->nfsdl_lock, 1);
1664 }
1665
1666 /*
1667  * Free a delegation.
1668  */
1669 static void
1670 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1671 {
1672
1673         TAILQ_REMOVE(hdp, dp, nfsdl_list);
1674         LIST_REMOVE(dp, nfsdl_hash);
1675         if (freeit)
1676                 free(dp, M_NFSCLDELEG);
1677         nfsstatsv1.cldelegates--;
1678         nfscl_delegcnt--;
1679 }
1680
1681 /*
1682  * Free up all state related to this client structure.
1683  */
1684 static void
1685 nfscl_cleanclient(struct nfsclclient *clp)
1686 {
1687         struct nfsclowner *owp, *nowp;
1688         struct nfsclopen *op, *nop;
1689         struct nfscllayout *lyp, *nlyp;
1690         struct nfscldevinfo *dip, *ndip;
1691
1692         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1693                 nfscl_freelayout(lyp);
1694
1695         LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1696                 nfscl_freedevinfo(dip);
1697
1698         /* Now, all the OpenOwners, etc. */
1699         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1700                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1701                         nfscl_freeopen(op, 0);
1702                 }
1703                 nfscl_freeopenowner(owp, 0);
1704         }
1705 }
1706
1707 /*
1708  * Called when an NFSERR_EXPIRED is received from the server.
1709  */
1710 static void
1711 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1712     struct ucred *cred, NFSPROC_T *p)
1713 {
1714         struct nfsclowner *owp, *nowp, *towp;
1715         struct nfsclopen *op, *nop, *top;
1716         struct nfscldeleg *dp, *ndp;
1717         int ret, printed = 0;
1718
1719         /*
1720          * First, merge locally issued Opens into the list for the server.
1721          */
1722         dp = TAILQ_FIRST(&clp->nfsc_deleg);
1723         while (dp != NULL) {
1724             ndp = TAILQ_NEXT(dp, nfsdl_list);
1725             owp = LIST_FIRST(&dp->nfsdl_owner);
1726             while (owp != NULL) {
1727                 nowp = LIST_NEXT(owp, nfsow_list);
1728                 op = LIST_FIRST(&owp->nfsow_open);
1729                 if (op != NULL) {
1730                     if (LIST_NEXT(op, nfso_list) != NULL)
1731                         panic("nfsclexp");
1732                     LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1733                         if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1734                             NFSV4CL_LOCKNAMELEN))
1735                             break;
1736                     }
1737                     if (towp != NULL) {
1738                         /* Merge opens in */
1739                         LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1740                             if (top->nfso_fhlen == op->nfso_fhlen &&
1741                                 !NFSBCMP(top->nfso_fh, op->nfso_fh,
1742                                  op->nfso_fhlen)) {
1743                                 top->nfso_mode |= op->nfso_mode;
1744                                 top->nfso_opencnt += op->nfso_opencnt;
1745                                 break;
1746                             }
1747                         }
1748                         if (top == NULL) {
1749                             /* Just add the open to the owner list */
1750                             LIST_REMOVE(op, nfso_list);
1751                             op->nfso_own = towp;
1752                             LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1753                             LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1754                                 op->nfso_fhlen), op, nfso_hash);
1755                             nfsstatsv1.cllocalopens--;
1756                             nfsstatsv1.clopens++;
1757                         }
1758                     } else {
1759                         /* Just add the openowner to the client list */
1760                         LIST_REMOVE(owp, nfsow_list);
1761                         owp->nfsow_clp = clp;
1762                         LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1763                         LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1764                             op->nfso_fhlen), op, nfso_hash);
1765                         nfsstatsv1.cllocalopenowners--;
1766                         nfsstatsv1.clopenowners++;
1767                         nfsstatsv1.cllocalopens--;
1768                         nfsstatsv1.clopens++;
1769                     }
1770                 }
1771                 owp = nowp;
1772             }
1773             if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1774                 printed = 1;
1775                 printf("nfsv4 expired locks lost\n");
1776             }
1777             nfscl_cleandeleg(dp);
1778             nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1779             dp = ndp;
1780         }
1781         if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1782             panic("nfsclexp");
1783
1784         /*
1785          * Now, try and reopen against the server.
1786          */
1787         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1788                 owp->nfsow_seqid = 0;
1789                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1790                         ret = nfscl_expireopen(clp, op, nmp, cred, p);
1791                         if (ret && !printed) {
1792                                 printed = 1;
1793                                 printf("nfsv4 expired locks lost\n");
1794                         }
1795                 }
1796                 if (LIST_EMPTY(&owp->nfsow_open))
1797                         nfscl_freeopenowner(owp, 0);
1798         }
1799 }
1800
1801 /*
1802  * This function must be called after the process represented by "own" has
1803  * exited. Must be called with CLSTATE lock held.
1804  */
1805 static void
1806 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1807 {
1808         struct nfsclowner *owp, *nowp;
1809         struct nfscllockowner *lp, *nlp;
1810         struct nfscldeleg *dp;
1811
1812         /* First, get rid of local locks on delegations. */
1813         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1814                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1815                     if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1816                         if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1817                             panic("nfscllckw");
1818                         nfscl_freelockowner(lp, 1);
1819                     }
1820                 }
1821         }
1822         owp = LIST_FIRST(&clp->nfsc_owner);
1823         while (owp != NULL) {
1824                 nowp = LIST_NEXT(owp, nfsow_list);
1825                 if (!NFSBCMP(owp->nfsow_owner, own,
1826                     NFSV4CL_LOCKNAMELEN)) {
1827                         /*
1828                          * If there are children that haven't closed the
1829                          * file descriptors yet, the opens will still be
1830                          * here. For that case, let the renew thread clear
1831                          * out the OpenOwner later.
1832                          */
1833                         if (LIST_EMPTY(&owp->nfsow_open))
1834                                 nfscl_freeopenowner(owp, 0);
1835                         else
1836                                 owp->nfsow_defunct = 1;
1837                 }
1838                 owp = nowp;
1839         }
1840 }
1841
1842 /*
1843  * Find open/lock owners for processes that have exited.
1844  */
1845 static void
1846 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1847 {
1848         struct nfsclowner *owp, *nowp;
1849         struct nfsclopen *op;
1850         struct nfscllockowner *lp, *nlp;
1851         struct nfscldeleg *dp;
1852
1853         /*
1854          * All the pidhash locks must be acquired, since they are sx locks
1855          * and must be acquired before the mutexes.  The pid(s) that will
1856          * be used aren't known yet, so all the locks need to be acquired.
1857          * Fortunately, this function is only performed once/sec.
1858          */
1859         pidhash_slockall();
1860         NFSLOCKCLSTATE();
1861         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1862                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1863                         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1864                                 if (LIST_EMPTY(&lp->nfsl_lock))
1865                                         nfscl_emptylockowner(lp, lhp);
1866                         }
1867                 }
1868                 if (nfscl_procdoesntexist(owp->nfsow_owner))
1869                         nfscl_cleanup_common(clp, owp->nfsow_owner);
1870         }
1871
1872         /*
1873          * For the single open_owner case, these lock owners need to be
1874          * checked to see if they still exist separately.
1875          * This is because nfscl_procdoesntexist() never returns true for
1876          * the single open_owner so that the above doesn't ever call
1877          * nfscl_cleanup_common().
1878          */
1879         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1880                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1881                         if (nfscl_procdoesntexist(lp->nfsl_owner))
1882                                 nfscl_cleanup_common(clp, lp->nfsl_owner);
1883                 }
1884         }
1885         NFSUNLOCKCLSTATE();
1886         pidhash_sunlockall();
1887 }
1888
1889 /*
1890  * Take the empty lock owner and move it to the local lhp list if the
1891  * associated process no longer exists.
1892  */
1893 static void
1894 nfscl_emptylockowner(struct nfscllockowner *lp,
1895     struct nfscllockownerfhhead *lhp)
1896 {
1897         struct nfscllockownerfh *lfhp, *mylfhp;
1898         struct nfscllockowner *nlp;
1899         int fnd_it;
1900
1901         /* If not a Posix lock owner, just return. */
1902         if ((lp->nfsl_lockflags & F_POSIX) == 0)
1903                 return;
1904
1905         fnd_it = 0;
1906         mylfhp = NULL;
1907         /*
1908          * First, search to see if this lock owner is already in the list.
1909          * If it is, then the associated process no longer exists.
1910          */
1911         SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1912                 if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1913                     !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1914                     lfhp->nfslfh_len))
1915                         mylfhp = lfhp;
1916                 LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1917                         if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1918                             NFSV4CL_LOCKNAMELEN))
1919                                 fnd_it = 1;
1920         }
1921         /* If not found, check if process still exists. */
1922         if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1923                 return;
1924
1925         /* Move the lock owner over to the local list. */
1926         if (mylfhp == NULL) {
1927                 mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1928                     M_NOWAIT);
1929                 if (mylfhp == NULL)
1930                         return;
1931                 mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1932                 NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1933                     mylfhp->nfslfh_len);
1934                 LIST_INIT(&mylfhp->nfslfh_lock);
1935                 SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1936         }
1937         LIST_REMOVE(lp, nfsl_list);
1938         LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1939 }
1940
1941 static int      fake_global;    /* Used to force visibility of MNTK_UNMOUNTF */
1942 /*
1943  * Called from nfs umount to free up the clientid.
1944  */
1945 void
1946 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1947 {
1948         struct nfsclclient *clp;
1949         struct ucred *cred;
1950         int igotlock;
1951
1952         /*
1953          * For the case that matters, this is the thread that set
1954          * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1955          * done to ensure that any thread executing nfscl_getcl() after
1956          * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1957          * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1958          * explanation, courtesy of Alan Cox.
1959          * What follows is a snippet from Alan Cox's email at:
1960          * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1961          * 
1962          * 1. Set MNTK_UNMOUNTF
1963          * 2. Acquire a standard FreeBSD mutex "m".
1964          * 3. Update some data structures.
1965          * 4. Release mutex "m".
1966          * 
1967          * Then, other threads that acquire "m" after step 4 has occurred will
1968          * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1969          * step 2 may or may not see MNTK_UNMOUNTF as set.
1970          */
1971         NFSLOCKCLSTATE();
1972         if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1973                 fake_global++;
1974                 NFSUNLOCKCLSTATE();
1975                 NFSLOCKCLSTATE();
1976         }
1977
1978         clp = nmp->nm_clp;
1979         if (clp != NULL) {
1980                 if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1981                         panic("nfscl umount");
1982
1983                 /*
1984                  * First, handshake with the nfscl renew thread, to terminate
1985                  * it.
1986                  */
1987                 clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1988                 while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1989                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1990                             "nfsclumnt", hz);
1991
1992                 /*
1993                  * Now, get the exclusive lock on the client state, so
1994                  * that no uses of the state are still in progress.
1995                  */
1996                 do {
1997                         igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1998                             NFSCLSTATEMUTEXPTR, NULL);
1999                 } while (!igotlock);
2000                 NFSUNLOCKCLSTATE();
2001
2002                 /*
2003                  * Free up all the state. It will expire on the server, but
2004                  * maybe we should do a SetClientId/SetClientIdConfirm so
2005                  * the server throws it away?
2006                  */
2007                 LIST_REMOVE(clp, nfsc_list);
2008                 nfscl_delegreturnall(clp, p);
2009                 cred = newnfs_getcred();
2010                 if (NFSHASNFSV4N(nmp)) {
2011                         (void)nfsrpc_destroysession(nmp, clp, cred, p);
2012                         (void)nfsrpc_destroyclient(nmp, clp, cred, p);
2013                 } else
2014                         (void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2015                 nfscl_cleanclient(clp);
2016                 nmp->nm_clp = NULL;
2017                 NFSFREECRED(cred);
2018                 free(clp, M_NFSCLCLIENT);
2019         } else
2020                 NFSUNLOCKCLSTATE();
2021 }
2022
2023 /*
2024  * This function is called when a server replies with NFSERR_STALECLIENTID
2025  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
2026  * doing Opens and Locks with reclaim. If these fail, it deletes the
2027  * corresponding state.
2028  */
2029 static void
2030 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
2031     NFSPROC_T *p)
2032 {
2033         struct nfsclowner *owp, *nowp;
2034         struct nfsclopen *op, *nop;
2035         struct nfscllockowner *lp, *nlp;
2036         struct nfscllock *lop, *nlop;
2037         struct nfscldeleg *dp, *ndp, *tdp;
2038         struct nfsmount *nmp;
2039         struct ucred *tcred;
2040         struct nfsclopenhead extra_open;
2041         struct nfscldeleghead extra_deleg;
2042         struct nfsreq *rep;
2043         u_int64_t len;
2044         u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2045         int i, igotlock = 0, error, trycnt, firstlock;
2046         struct nfscllayout *lyp, *nlyp;
2047         bool recovered_one;
2048
2049         /*
2050          * First, lock the client structure, so everyone else will
2051          * block when trying to use state.
2052          */
2053         NFSLOCKCLSTATE();
2054         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2055         do {
2056                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2057                     NFSCLSTATEMUTEXPTR, NULL);
2058         } while (!igotlock);
2059         NFSUNLOCKCLSTATE();
2060
2061         nmp = clp->nfsc_nmp;
2062         if (nmp == NULL)
2063                 panic("nfscl recover");
2064
2065         /*
2066          * For now, just get rid of all layouts. There may be a need
2067          * to do LayoutCommit Ops with reclaim == true later.
2068          */
2069         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2070                 nfscl_freelayout(lyp);
2071         TAILQ_INIT(&clp->nfsc_layout);
2072         for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2073                 LIST_INIT(&clp->nfsc_layouthash[i]);
2074
2075         trycnt = 5;
2076         tcred = NULL;
2077         do {
2078                 error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2079         } while ((error == NFSERR_STALECLIENTID ||
2080              error == NFSERR_BADSESSION ||
2081              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2082         if (error) {
2083                 NFSLOCKCLSTATE();
2084                 clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2085                     NFSCLFLAGS_RECVRINPROG);
2086                 wakeup(&clp->nfsc_flags);
2087                 nfsv4_unlock(&clp->nfsc_lock, 0);
2088                 NFSUNLOCKCLSTATE();
2089                 return;
2090         }
2091         clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2092         clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2093
2094         /*
2095          * Mark requests already queued on the server, so that they don't
2096          * initiate another recovery cycle. Any requests already in the
2097          * queue that handle state information will have the old stale
2098          * clientid/stateid and will get a NFSERR_STALESTATEID,
2099          * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2100          * This will be translated to NFSERR_STALEDONTRECOVER when
2101          * R_DONTRECOVER is set.
2102          */
2103         NFSLOCKREQ();
2104         TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2105                 if (rep->r_nmp == nmp)
2106                         rep->r_flags |= R_DONTRECOVER;
2107         }
2108         NFSUNLOCKREQ();
2109
2110         /*
2111          * If nfsrpc_setclient() returns *retokp == true,
2112          * no more recovery is needed.
2113          */
2114         if (*retokp)
2115                 goto out;
2116
2117         /*
2118          * Now, mark all delegations "need reclaim".
2119          */
2120         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2121                 dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2122
2123         TAILQ_INIT(&extra_deleg);
2124         LIST_INIT(&extra_open);
2125         /*
2126          * Now traverse the state lists, doing Open and Lock Reclaims.
2127          */
2128         tcred = newnfs_getcred();
2129         recovered_one = false;
2130         owp = LIST_FIRST(&clp->nfsc_owner);
2131         while (owp != NULL) {
2132             nowp = LIST_NEXT(owp, nfsow_list);
2133             owp->nfsow_seqid = 0;
2134             op = LIST_FIRST(&owp->nfsow_open);
2135             while (op != NULL) {
2136                 nop = LIST_NEXT(op, nfso_list);
2137                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2138                     /* Search for a delegation to reclaim with the open */
2139                     TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2140                         if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2141                             continue;
2142                         if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2143                             mode = NFSV4OPEN_ACCESSWRITE;
2144                             delegtype = NFSV4OPEN_DELEGATEWRITE;
2145                         } else {
2146                             mode = NFSV4OPEN_ACCESSREAD;
2147                             delegtype = NFSV4OPEN_DELEGATEREAD;
2148                         }
2149                         if ((op->nfso_mode & mode) == mode &&
2150                             op->nfso_fhlen == dp->nfsdl_fhlen &&
2151                             !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2152                             break;
2153                     }
2154                     ndp = dp;
2155                     if (dp == NULL)
2156                         delegtype = NFSV4OPEN_DELEGATENONE;
2157                     newnfs_copycred(&op->nfso_cred, tcred);
2158                     error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2159                         op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2160                         op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2161                         tcred, p);
2162                     if (!error) {
2163                         recovered_one = true;
2164                         /* Handle any replied delegation */
2165                         if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2166                             || NFSMNT_RDONLY(nmp->nm_mountp))) {
2167                             if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2168                                 mode = NFSV4OPEN_ACCESSWRITE;
2169                             else
2170                                 mode = NFSV4OPEN_ACCESSREAD;
2171                             TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2172                                 if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2173                                     continue;
2174                                 if ((op->nfso_mode & mode) == mode &&
2175                                     op->nfso_fhlen == dp->nfsdl_fhlen &&
2176                                     !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2177                                     op->nfso_fhlen)) {
2178                                     dp->nfsdl_stateid = ndp->nfsdl_stateid;
2179                                     dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2180                                     dp->nfsdl_ace = ndp->nfsdl_ace;
2181                                     dp->nfsdl_change = ndp->nfsdl_change;
2182                                     dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2183                                     if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2184                                         dp->nfsdl_flags |= NFSCLDL_RECALL;
2185                                     free(ndp, M_NFSCLDELEG);
2186                                     ndp = NULL;
2187                                     break;
2188                                 }
2189                             }
2190                         }
2191                         if (ndp != NULL)
2192                             TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2193
2194                         /* and reclaim all byte range locks */
2195                         lp = LIST_FIRST(&op->nfso_lock);
2196                         while (lp != NULL) {
2197                             nlp = LIST_NEXT(lp, nfsl_list);
2198                             lp->nfsl_seqid = 0;
2199                             firstlock = 1;
2200                             lop = LIST_FIRST(&lp->nfsl_lock);
2201                             while (lop != NULL) {
2202                                 nlop = LIST_NEXT(lop, nfslo_list);
2203                                 if (lop->nfslo_end == NFS64BITSSET)
2204                                     len = NFS64BITSSET;
2205                                 else
2206                                     len = lop->nfslo_end - lop->nfslo_first;
2207                                 error = nfscl_trylock(nmp, NULL,
2208                                     op->nfso_fh, op->nfso_fhlen, lp,
2209                                     firstlock, 1, lop->nfslo_first, len,
2210                                     lop->nfslo_type, tcred, p);
2211                                 if (error != 0)
2212                                     nfscl_freelock(lop, 0);
2213                                 else
2214                                     firstlock = 0;
2215                                 lop = nlop;
2216                             }
2217                             /* If no locks, but a lockowner, just delete it. */
2218                             if (LIST_EMPTY(&lp->nfsl_lock))
2219                                 nfscl_freelockowner(lp, 0);
2220                             lp = nlp;
2221                         }
2222                     } else if (error == NFSERR_NOGRACE && !recovered_one &&
2223                         NFSHASNFSV4N(nmp)) {
2224                         /*
2225                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2226                          * actually end up here, since the client will do
2227                          * a recovery for NFSERR_BADSESSION, but will get
2228                          * an NFSERR_NOGRACE reply for the first "reclaim"
2229                          * attempt.
2230                          * So, call nfscl_expireclient() to recover the
2231                          * opens as best we can and then do a reclaim
2232                          * complete and return.
2233                          */
2234                         nfsrpc_reclaimcomplete(nmp, cred, p);
2235                         nfscl_expireclient(clp, nmp, tcred, p);
2236                         goto out;
2237                     }
2238                 }
2239                 if (error != 0 && error != NFSERR_BADSESSION)
2240                     nfscl_freeopen(op, 0);
2241                 op = nop;
2242             }
2243             owp = nowp;
2244         }
2245
2246         /*
2247          * Now, try and get any delegations not yet reclaimed by cobbling
2248          * to-gether an appropriate open.
2249          */
2250         nowp = NULL;
2251         dp = TAILQ_FIRST(&clp->nfsc_deleg);
2252         while (dp != NULL) {
2253             ndp = TAILQ_NEXT(dp, nfsdl_list);
2254             if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2255                 if (nowp == NULL) {
2256                     nowp = malloc(
2257                         sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2258                     /*
2259                      * Name must be as long an largest possible
2260                      * NFSV4CL_LOCKNAMELEN. 12 for now.
2261                      */
2262                     NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2263                         NFSV4CL_LOCKNAMELEN);
2264                     LIST_INIT(&nowp->nfsow_open);
2265                     nowp->nfsow_clp = clp;
2266                     nowp->nfsow_seqid = 0;
2267                     nowp->nfsow_defunct = 0;
2268                     nfscl_lockinit(&nowp->nfsow_rwlock);
2269                 }
2270                 nop = NULL;
2271                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2272                     nop = malloc(sizeof (struct nfsclopen) +
2273                         dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2274                     nop->nfso_own = nowp;
2275                     if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2276                         nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2277                         delegtype = NFSV4OPEN_DELEGATEWRITE;
2278                     } else {
2279                         nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2280                         delegtype = NFSV4OPEN_DELEGATEREAD;
2281                     }
2282                     nop->nfso_opencnt = 0;
2283                     nop->nfso_posixlock = 1;
2284                     nop->nfso_fhlen = dp->nfsdl_fhlen;
2285                     NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2286                     LIST_INIT(&nop->nfso_lock);
2287                     nop->nfso_stateid.seqid = 0;
2288                     nop->nfso_stateid.other[0] = 0;
2289                     nop->nfso_stateid.other[1] = 0;
2290                     nop->nfso_stateid.other[2] = 0;
2291                     newnfs_copycred(&dp->nfsdl_cred, tcred);
2292                     newnfs_copyincred(tcred, &nop->nfso_cred);
2293                     tdp = NULL;
2294                     error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2295                         nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2296                         nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2297                         delegtype, tcred, p);
2298                     if (tdp != NULL) {
2299                         if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2300                             mode = NFSV4OPEN_ACCESSWRITE;
2301                         else
2302                             mode = NFSV4OPEN_ACCESSREAD;
2303                         if ((nop->nfso_mode & mode) == mode &&
2304                             nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2305                             !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2306                             nop->nfso_fhlen)) {
2307                             dp->nfsdl_stateid = tdp->nfsdl_stateid;
2308                             dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2309                             dp->nfsdl_ace = tdp->nfsdl_ace;
2310                             dp->nfsdl_change = tdp->nfsdl_change;
2311                             dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2312                             if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2313                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2314                             free(tdp, M_NFSCLDELEG);
2315                         } else {
2316                             TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2317                         }
2318                     }
2319                 }
2320                 if (error) {
2321                     if (nop != NULL)
2322                         free(nop, M_NFSCLOPEN);
2323                     if (error == NFSERR_NOGRACE && !recovered_one &&
2324                         NFSHASNFSV4N(nmp)) {
2325                         /*
2326                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2327                          * actually end up here, since the client will do
2328                          * a recovery for NFSERR_BADSESSION, but will get
2329                          * an NFSERR_NOGRACE reply for the first "reclaim"
2330                          * attempt.
2331                          * So, call nfscl_expireclient() to recover the
2332                          * opens as best we can and then do a reclaim
2333                          * complete and return.
2334                          */
2335                         nfsrpc_reclaimcomplete(nmp, cred, p);
2336                         nfscl_expireclient(clp, nmp, tcred, p);
2337                         free(nowp, M_NFSCLOWNER);
2338                         goto out;
2339                     }
2340                     /*
2341                      * Couldn't reclaim it, so throw the state
2342                      * away. Ouch!!
2343                      */
2344                     nfscl_cleandeleg(dp);
2345                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2346                 } else {
2347                     recovered_one = true;
2348                     LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2349                 }
2350             }
2351             dp = ndp;
2352         }
2353
2354         /*
2355          * Now, get rid of extra Opens and Delegations.
2356          */
2357         LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2358                 do {
2359                         newnfs_copycred(&op->nfso_cred, tcred);
2360                         error = nfscl_tryclose(op, tcred, nmp, p);
2361                         if (error == NFSERR_GRACE)
2362                                 (void) nfs_catnap(PZERO, error, "nfsexcls");
2363                 } while (error == NFSERR_GRACE);
2364                 LIST_REMOVE(op, nfso_list);
2365                 free(op, M_NFSCLOPEN);
2366         }
2367         if (nowp != NULL)
2368                 free(nowp, M_NFSCLOWNER);
2369
2370         TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2371                 do {
2372                         newnfs_copycred(&dp->nfsdl_cred, tcred);
2373                         error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2374                         if (error == NFSERR_GRACE)
2375                                 (void) nfs_catnap(PZERO, error, "nfsexdlg");
2376                 } while (error == NFSERR_GRACE);
2377                 TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2378                 free(dp, M_NFSCLDELEG);
2379         }
2380
2381         /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2382         if (NFSHASNFSV4N(nmp))
2383                 (void)nfsrpc_reclaimcomplete(nmp, cred, p);
2384
2385 out:
2386         NFSLOCKCLSTATE();
2387         clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2388         wakeup(&clp->nfsc_flags);
2389         nfsv4_unlock(&clp->nfsc_lock, 0);
2390         NFSUNLOCKCLSTATE();
2391         if (tcred != NULL)
2392                 NFSFREECRED(tcred);
2393 }
2394
2395 /*
2396  * This function is called when a server replies with NFSERR_EXPIRED.
2397  * It deletes all state for the client and does a fresh SetClientId/confirm.
2398  * XXX Someday it should post a signal to the process(es) that hold the
2399  * state, so they know that lock state has been lost.
2400  */
2401 int
2402 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2403 {
2404         struct nfsmount *nmp;
2405         struct ucred *cred;
2406         int igotlock = 0, error, trycnt;
2407
2408         /*
2409          * If the clientid has gone away or a new SetClientid has already
2410          * been done, just return ok.
2411          */
2412         if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2413                 return (0);
2414
2415         /*
2416          * First, lock the client structure, so everyone else will
2417          * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2418          * that only one thread does the work.
2419          */
2420         NFSLOCKCLSTATE();
2421         clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2422         do {
2423                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2424                     NFSCLSTATEMUTEXPTR, NULL);
2425         } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2426         if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2427                 if (igotlock)
2428                         nfsv4_unlock(&clp->nfsc_lock, 0);
2429                 NFSUNLOCKCLSTATE();
2430                 return (0);
2431         }
2432         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2433         NFSUNLOCKCLSTATE();
2434
2435         nmp = clp->nfsc_nmp;
2436         if (nmp == NULL)
2437                 panic("nfscl expired");
2438         cred = newnfs_getcred();
2439         trycnt = 5;
2440         do {
2441                 error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2442         } while ((error == NFSERR_STALECLIENTID ||
2443              error == NFSERR_BADSESSION ||
2444              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2445         if (error) {
2446                 NFSLOCKCLSTATE();
2447                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2448         } else {
2449                 /*
2450                  * Expire the state for the client.
2451                  */
2452                 nfscl_expireclient(clp, nmp, cred, p);
2453                 NFSLOCKCLSTATE();
2454                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2455                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2456         }
2457         clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2458         wakeup(&clp->nfsc_flags);
2459         nfsv4_unlock(&clp->nfsc_lock, 0);
2460         NFSUNLOCKCLSTATE();
2461         NFSFREECRED(cred);
2462         return (error);
2463 }
2464
2465 /*
2466  * This function inserts a lock in the list after insert_lop.
2467  */
2468 static void
2469 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2470     struct nfscllock *insert_lop, int local)
2471 {
2472
2473         if ((struct nfscllockowner *)insert_lop == lp)
2474                 LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2475         else
2476                 LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2477         if (local)
2478                 nfsstatsv1.cllocallocks++;
2479         else
2480                 nfsstatsv1.cllocks++;
2481 }
2482
2483 /*
2484  * This function updates the locking for a lock owner and given file. It
2485  * maintains a list of lock ranges ordered on increasing file offset that
2486  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2487  * It always adds new_lop to the list and sometimes uses the one pointed
2488  * at by other_lopp.
2489  * Returns 1 if the locks were modified, 0 otherwise.
2490  */
2491 static int
2492 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2493     struct nfscllock **other_lopp, int local)
2494 {
2495         struct nfscllock *new_lop = *new_lopp;
2496         struct nfscllock *lop, *tlop, *ilop;
2497         struct nfscllock *other_lop;
2498         int unlock = 0, modified = 0;
2499         u_int64_t tmp;
2500
2501         /*
2502          * Work down the list until the lock is merged.
2503          */
2504         if (new_lop->nfslo_type == F_UNLCK)
2505                 unlock = 1;
2506         ilop = (struct nfscllock *)lp;
2507         lop = LIST_FIRST(&lp->nfsl_lock);
2508         while (lop != NULL) {
2509             /*
2510              * Only check locks for this file that aren't before the start of
2511              * new lock's range.
2512              */
2513             if (lop->nfslo_end >= new_lop->nfslo_first) {
2514                 if (new_lop->nfslo_end < lop->nfslo_first) {
2515                     /*
2516                      * If the new lock ends before the start of the
2517                      * current lock's range, no merge, just insert
2518                      * the new lock.
2519                      */
2520                     break;
2521                 }
2522                 if (new_lop->nfslo_type == lop->nfslo_type ||
2523                     (new_lop->nfslo_first <= lop->nfslo_first &&
2524                      new_lop->nfslo_end >= lop->nfslo_end)) {
2525                     /*
2526                      * This lock can be absorbed by the new lock/unlock.
2527                      * This happens when it covers the entire range
2528                      * of the old lock or is contiguous
2529                      * with the old lock and is of the same type or an
2530                      * unlock.
2531                      */
2532                     if (new_lop->nfslo_type != lop->nfslo_type ||
2533                         new_lop->nfslo_first != lop->nfslo_first ||
2534                         new_lop->nfslo_end != lop->nfslo_end)
2535                         modified = 1;
2536                     if (lop->nfslo_first < new_lop->nfslo_first)
2537                         new_lop->nfslo_first = lop->nfslo_first;
2538                     if (lop->nfslo_end > new_lop->nfslo_end)
2539                         new_lop->nfslo_end = lop->nfslo_end;
2540                     tlop = lop;
2541                     lop = LIST_NEXT(lop, nfslo_list);
2542                     nfscl_freelock(tlop, local);
2543                     continue;
2544                 }
2545
2546                 /*
2547                  * All these cases are for contiguous locks that are not the
2548                  * same type, so they can't be merged.
2549                  */
2550                 if (new_lop->nfslo_first <= lop->nfslo_first) {
2551                     /*
2552                      * This case is where the new lock overlaps with the
2553                      * first part of the old lock. Move the start of the
2554                      * old lock to just past the end of the new lock. The
2555                      * new lock will be inserted in front of the old, since
2556                      * ilop hasn't been updated. (We are done now.)
2557                      */
2558                     if (lop->nfslo_first != new_lop->nfslo_end) {
2559                         lop->nfslo_first = new_lop->nfslo_end;
2560                         modified = 1;
2561                     }
2562                     break;
2563                 }
2564                 if (new_lop->nfslo_end >= lop->nfslo_end) {
2565                     /*
2566                      * This case is where the new lock overlaps with the
2567                      * end of the old lock's range. Move the old lock's
2568                      * end to just before the new lock's first and insert
2569                      * the new lock after the old lock.
2570                      * Might not be done yet, since the new lock could
2571                      * overlap further locks with higher ranges.
2572                      */
2573                     if (lop->nfslo_end != new_lop->nfslo_first) {
2574                         lop->nfslo_end = new_lop->nfslo_first;
2575                         modified = 1;
2576                     }
2577                     ilop = lop;
2578                     lop = LIST_NEXT(lop, nfslo_list);
2579                     continue;
2580                 }
2581                 /*
2582                  * The final case is where the new lock's range is in the
2583                  * middle of the current lock's and splits the current lock
2584                  * up. Use *other_lopp to handle the second part of the
2585                  * split old lock range. (We are done now.)
2586                  * For unlock, we use new_lop as other_lop and tmp, since
2587                  * other_lop and new_lop are the same for this case.
2588                  * We noted the unlock case above, so we don't need
2589                  * new_lop->nfslo_type any longer.
2590                  */
2591                 tmp = new_lop->nfslo_first;
2592                 if (unlock) {
2593                     other_lop = new_lop;
2594                     *new_lopp = NULL;
2595                 } else {
2596                     other_lop = *other_lopp;
2597                     *other_lopp = NULL;
2598                 }
2599                 other_lop->nfslo_first = new_lop->nfslo_end;
2600                 other_lop->nfslo_end = lop->nfslo_end;
2601                 other_lop->nfslo_type = lop->nfslo_type;
2602                 lop->nfslo_end = tmp;
2603                 nfscl_insertlock(lp, other_lop, lop, local);
2604                 ilop = lop;
2605                 modified = 1;
2606                 break;
2607             }
2608             ilop = lop;
2609             lop = LIST_NEXT(lop, nfslo_list);
2610             if (lop == NULL)
2611                 break;
2612         }
2613
2614         /*
2615          * Insert the new lock in the list at the appropriate place.
2616          */
2617         if (!unlock) {
2618                 nfscl_insertlock(lp, new_lop, ilop, local);
2619                 *new_lopp = NULL;
2620                 modified = 1;
2621         }
2622         return (modified);
2623 }
2624
2625 /*
2626  * This function must be run as a kernel thread.
2627  * It does Renew Ops and recovery, when required.
2628  */
2629 void
2630 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2631 {
2632         struct nfsclowner *owp, *nowp;
2633         struct nfsclopen *op;
2634         struct nfscllockowner *lp, *nlp;
2635         struct nfscldeleghead dh;
2636         struct nfscldeleg *dp, *ndp;
2637         struct ucred *cred;
2638         u_int32_t clidrev;
2639         int error, cbpathdown, islept, igotlock, ret, clearok;
2640         uint32_t recover_done_time = 0;
2641         time_t mytime;
2642         static time_t prevsec = 0;
2643         struct nfscllockownerfh *lfhp, *nlfhp;
2644         struct nfscllockownerfhhead lfh;
2645         struct nfscllayout *lyp, *nlyp;
2646         struct nfscldevinfo *dip, *ndip;
2647         struct nfscllayouthead rlh;
2648         struct nfsclrecalllayout *recallp;
2649         struct nfsclds *dsp;
2650         bool retok;
2651         struct mount *mp;
2652         vnode_t vp;
2653
2654         cred = newnfs_getcred();
2655         NFSLOCKCLSTATE();
2656         clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2657         mp = clp->nfsc_nmp->nm_mountp;
2658         NFSUNLOCKCLSTATE();
2659         for(;;) {
2660                 newnfs_setroot(cred);
2661                 cbpathdown = 0;
2662                 if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2663                         /*
2664                          * Only allow one full recover within 1/2 of the lease
2665                          * duration (nfsc_renew).
2666                          * retok is value/result.  If passed in set to true,
2667                          * it indicates only a CreateSession operation should
2668                          * be attempted.
2669                          * If it is returned true, it indicates that the
2670                          * recovery only required a CreateSession.
2671                          */
2672                         retok = true;
2673                         if (recover_done_time < NFSD_MONOSEC) {
2674                                 recover_done_time = NFSD_MONOSEC +
2675                                     clp->nfsc_renew;
2676                                 retok = false;
2677                         }
2678                         NFSCL_DEBUG(1, "Doing recovery, only "
2679                             "createsession=%d\n", retok);
2680                         nfscl_recover(clp, &retok, cred, p);
2681                 }
2682                 if (clp->nfsc_expire <= NFSD_MONOSEC &&
2683                     (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2684                         clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2685                         clidrev = clp->nfsc_clientidrev;
2686                         error = nfsrpc_renew(clp, NULL, cred, p);
2687                         if (error == NFSERR_CBPATHDOWN)
2688                             cbpathdown = 1;
2689                         else if (error == NFSERR_STALECLIENTID ||
2690                             error == NFSERR_BADSESSION) {
2691                             NFSLOCKCLSTATE();
2692                             clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2693                             NFSUNLOCKCLSTATE();
2694                         } else if (error == NFSERR_EXPIRED)
2695                             (void) nfscl_hasexpired(clp, clidrev, p);
2696                 }
2697
2698 checkdsrenew:
2699                 if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2700                         /* Do renews for any DS sessions. */
2701                         NFSLOCKMNT(clp->nfsc_nmp);
2702                         /* Skip first entry, since the MDS is handled above. */
2703                         dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2704                         if (dsp != NULL)
2705                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2706                         while (dsp != NULL) {
2707                                 if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2708                                     dsp->nfsclds_sess.nfsess_defunct == 0) {
2709                                         dsp->nfsclds_expire = NFSD_MONOSEC +
2710                                             clp->nfsc_renew;
2711                                         NFSUNLOCKMNT(clp->nfsc_nmp);
2712                                         (void)nfsrpc_renew(clp, dsp, cred, p);
2713                                         goto checkdsrenew;
2714                                 }
2715                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2716                         }
2717                         NFSUNLOCKMNT(clp->nfsc_nmp);
2718                 }
2719
2720                 TAILQ_INIT(&dh);
2721                 NFSLOCKCLSTATE();
2722                 if (cbpathdown)
2723                         /* It's a Total Recall! */
2724                         nfscl_totalrecall(clp);
2725
2726                 /*
2727                  * Now, handle defunct owners.
2728                  */
2729                 LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2730                         if (LIST_EMPTY(&owp->nfsow_open)) {
2731                                 if (owp->nfsow_defunct != 0)
2732                                         nfscl_freeopenowner(owp, 0);
2733                         }
2734                 }
2735
2736                 /*
2737                  * Do the recall on any delegations. To avoid trouble, always
2738                  * come back up here after having slept.
2739                  */
2740                 igotlock = 0;
2741 tryagain:
2742                 dp = TAILQ_FIRST(&clp->nfsc_deleg);
2743                 while (dp != NULL) {
2744                         ndp = TAILQ_NEXT(dp, nfsdl_list);
2745                         if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2746                                 /*
2747                                  * Wait for outstanding I/O ops to be done.
2748                                  */
2749                                 if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2750                                     if (igotlock) {
2751                                         nfsv4_unlock(&clp->nfsc_lock, 0);
2752                                         igotlock = 0;
2753                                     }
2754                                     dp->nfsdl_rwlock.nfslock_lock |=
2755                                         NFSV4LOCK_WANTED;
2756                                     msleep(&dp->nfsdl_rwlock,
2757                                         NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2758                                         5 * hz);
2759                                     if (NFSCL_FORCEDISM(mp))
2760                                         goto terminate;
2761                                     goto tryagain;
2762                                 }
2763                                 while (!igotlock) {
2764                                     igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2765                                         &islept, NFSCLSTATEMUTEXPTR, mp);
2766                                     if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2767                                         goto terminate;
2768                                     if (islept)
2769                                         goto tryagain;
2770                                 }
2771                                 NFSUNLOCKCLSTATE();
2772                                 newnfs_copycred(&dp->nfsdl_cred, cred);
2773                                 ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2774                                     NULL, cred, p, 1, &vp);
2775                                 if (!ret) {
2776                                     nfscl_cleandeleg(dp);
2777                                     TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2778                                         nfsdl_list);
2779                                     LIST_REMOVE(dp, nfsdl_hash);
2780                                     TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2781                                     nfscl_delegcnt--;
2782                                     nfsstatsv1.cldelegates--;
2783                                 }
2784                                 NFSLOCKCLSTATE();
2785                                 /*
2786                                  * The nfsc_lock must be released before doing
2787                                  * vrele(), since it might call nfs_inactive().
2788                                  * For the unlikely case where the vnode failed
2789                                  * to be acquired by nfscl_recalldeleg(), a
2790                                  * VOP_RECLAIM() should be in progress and it
2791                                  * will return the delegation.
2792                                  */
2793                                 nfsv4_unlock(&clp->nfsc_lock, 0);
2794                                 igotlock = 0;
2795                                 if (vp != NULL) {
2796                                         NFSUNLOCKCLSTATE();
2797                                         vrele(vp);
2798                                         NFSLOCKCLSTATE();
2799                                 }
2800                                 goto tryagain;
2801                         }
2802                         dp = ndp;
2803                 }
2804
2805                 /*
2806                  * Clear out old delegations, if we are above the high water
2807                  * mark. Only clear out ones with no state related to them.
2808                  * The tailq list is in LRU order.
2809                  */
2810                 dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2811                 while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2812                     ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2813                     if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2814                         dp->nfsdl_rwlock.nfslock_lock == 0 &&
2815                         dp->nfsdl_timestamp < NFSD_MONOSEC &&
2816                         (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2817                           NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2818                         clearok = 1;
2819                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2820                             op = LIST_FIRST(&owp->nfsow_open);
2821                             if (op != NULL) {
2822                                 clearok = 0;
2823                                 break;
2824                             }
2825                         }
2826                         if (clearok) {
2827                             LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2828                                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
2829                                     clearok = 0;
2830                                     break;
2831                                 }
2832                             }
2833                         }
2834                         if (clearok) {
2835                             TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2836                             LIST_REMOVE(dp, nfsdl_hash);
2837                             TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2838                             nfscl_delegcnt--;
2839                             nfsstatsv1.cldelegates--;
2840                         }
2841                     }
2842                     dp = ndp;
2843                 }
2844                 if (igotlock)
2845                         nfsv4_unlock(&clp->nfsc_lock, 0);
2846
2847                 /*
2848                  * Do the recall on any layouts. To avoid trouble, always
2849                  * come back up here after having slept.
2850                  */
2851                 TAILQ_INIT(&rlh);
2852 tryagain2:
2853                 TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2854                         if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2855                                 /*
2856                                  * Wait for outstanding I/O ops to be done.
2857                                  */
2858                                 if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2859                                     (lyp->nfsly_lock.nfslock_lock &
2860                                      NFSV4LOCK_LOCK) != 0) {
2861                                         lyp->nfsly_lock.nfslock_lock |=
2862                                             NFSV4LOCK_WANTED;
2863                                         msleep(&lyp->nfsly_lock.nfslock_lock,
2864                                             NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2865                                             5 * hz);
2866                                         if (NFSCL_FORCEDISM(mp))
2867                                             goto terminate;
2868                                         goto tryagain2;
2869                                 }
2870                                 /* Move the layout to the recall list. */
2871                                 TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2872                                     nfsly_list);
2873                                 LIST_REMOVE(lyp, nfsly_hash);
2874                                 TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2875
2876                                 /* Handle any layout commits. */
2877                                 if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2878                                     (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2879                                         lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2880                                         NFSUNLOCKCLSTATE();
2881                                         NFSCL_DEBUG(3, "do layoutcommit\n");
2882                                         nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2883                                             cred, p);
2884                                         NFSLOCKCLSTATE();
2885                                         goto tryagain2;
2886                                 }
2887                         }
2888                 }
2889
2890                 /* Now, look for stale layouts. */
2891                 lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2892                 while (lyp != NULL) {
2893                         nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2894                         if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2895                             (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2896                             lyp->nfsly_lock.nfslock_usecnt == 0 &&
2897                             lyp->nfsly_lock.nfslock_lock == 0) {
2898                                 NFSCL_DEBUG(4, "ret stale lay=%d\n",
2899                                     nfscl_layoutcnt);
2900                                 recallp = malloc(sizeof(*recallp),
2901                                     M_NFSLAYRECALL, M_NOWAIT);
2902                                 if (recallp == NULL)
2903                                         break;
2904                                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2905                                     lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2906                                     lyp->nfsly_stateid.seqid, 0, 0, NULL,
2907                                     recallp);
2908                         }
2909                         lyp = nlyp;
2910                 }
2911
2912                 /*
2913                  * Free up any unreferenced device info structures.
2914                  */
2915                 LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2916                         if (dip->nfsdi_layoutrefs == 0 &&
2917                             dip->nfsdi_refcnt == 0) {
2918                                 NFSCL_DEBUG(4, "freeing devinfo\n");
2919                                 LIST_REMOVE(dip, nfsdi_list);
2920                                 nfscl_freedevinfo(dip);
2921                         }
2922                 }
2923                 NFSUNLOCKCLSTATE();
2924
2925                 /* Do layout return(s), as required. */
2926                 TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2927                         TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2928                         NFSCL_DEBUG(4, "ret layout\n");
2929                         nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2930                         nfscl_freelayout(lyp);
2931                 }
2932
2933                 /*
2934                  * Delegreturn any delegations cleaned out or recalled.
2935                  */
2936                 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2937                         newnfs_copycred(&dp->nfsdl_cred, cred);
2938                         (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2939                         TAILQ_REMOVE(&dh, dp, nfsdl_list);
2940                         free(dp, M_NFSCLDELEG);
2941                 }
2942
2943                 SLIST_INIT(&lfh);
2944                 /*
2945                  * Call nfscl_cleanupkext() once per second to check for
2946                  * open/lock owners where the process has exited.
2947                  */
2948                 mytime = NFSD_MONOSEC;
2949                 if (prevsec != mytime) {
2950                         prevsec = mytime;
2951                         nfscl_cleanupkext(clp, &lfh);
2952                 }
2953
2954                 /*
2955                  * Do a ReleaseLockOwner for all lock owners where the
2956                  * associated process no longer exists, as found by
2957                  * nfscl_cleanupkext().
2958                  */
2959                 newnfs_setroot(cred);
2960                 SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2961                         LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2962                             nlp) {
2963                                 (void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2964                                     lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2965                                     p);
2966                                 nfscl_freelockowner(lp, 0);
2967                         }
2968                         free(lfhp, M_TEMP);
2969                 }
2970                 SLIST_INIT(&lfh);
2971
2972                 NFSLOCKCLSTATE();
2973                 if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2974                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2975                             hz);
2976 terminate:
2977                 if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2978                         clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2979                         NFSUNLOCKCLSTATE();
2980                         NFSFREECRED(cred);
2981                         wakeup((caddr_t)clp);
2982                         return;
2983                 }
2984                 NFSUNLOCKCLSTATE();
2985         }
2986 }
2987
2988 /*
2989  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2990  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2991  */
2992 void
2993 nfscl_initiate_recovery(struct nfsclclient *clp)
2994 {
2995
2996         if (clp == NULL)
2997                 return;
2998         NFSLOCKCLSTATE();
2999         clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
3000         NFSUNLOCKCLSTATE();
3001         wakeup((caddr_t)clp);
3002 }
3003
3004 /*
3005  * Dump out the state stuff for debugging.
3006  */
3007 void
3008 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
3009     int lockowner, int locks)
3010 {
3011         struct nfsclclient *clp;
3012         struct nfsclowner *owp;
3013         struct nfsclopen *op;
3014         struct nfscllockowner *lp;
3015         struct nfscllock *lop;
3016         struct nfscldeleg *dp;
3017
3018         clp = nmp->nm_clp;
3019         if (clp == NULL) {
3020                 printf("nfscl dumpstate NULL clp\n");
3021                 return;
3022         }
3023         NFSLOCKCLSTATE();
3024         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
3025           LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3026             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3027                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3028                     owp->nfsow_owner[0], owp->nfsow_owner[1],
3029                     owp->nfsow_owner[2], owp->nfsow_owner[3],
3030                     owp->nfsow_seqid);
3031             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3032                 if (opens)
3033                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3034                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3035                         op->nfso_stateid.other[2], op->nfso_opencnt,
3036                         op->nfso_fh[12]);
3037                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3038                     if (lockowner)
3039                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3040                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3041                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3042                             lp->nfsl_seqid,
3043                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3044                             lp->nfsl_stateid.other[2]);
3045                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3046                         if (locks)
3047 #ifdef __FreeBSD__
3048                             printf("lck typ=%d fst=%ju end=%ju\n",
3049                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3050                                 (intmax_t)lop->nfslo_end);
3051 #else
3052                             printf("lck typ=%d fst=%qd end=%qd\n",
3053                                 lop->nfslo_type, lop->nfslo_first,
3054                                 lop->nfslo_end);
3055 #endif
3056                     }
3057                 }
3058             }
3059           }
3060         }
3061         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3062             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3063                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3064                     owp->nfsow_owner[0], owp->nfsow_owner[1],
3065                     owp->nfsow_owner[2], owp->nfsow_owner[3],
3066                     owp->nfsow_seqid);
3067             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3068                 if (opens)
3069                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3070                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3071                         op->nfso_stateid.other[2], op->nfso_opencnt,
3072                         op->nfso_fh[12]);
3073                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3074                     if (lockowner)
3075                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3076                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3077                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3078                             lp->nfsl_seqid,
3079                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3080                             lp->nfsl_stateid.other[2]);
3081                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3082                         if (locks)
3083 #ifdef __FreeBSD__
3084                             printf("lck typ=%d fst=%ju end=%ju\n",
3085                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3086                                 (intmax_t)lop->nfslo_end);
3087 #else
3088                             printf("lck typ=%d fst=%qd end=%qd\n",
3089                                 lop->nfslo_type, lop->nfslo_first,
3090                                 lop->nfslo_end);
3091 #endif
3092                     }
3093                 }
3094             }
3095         }
3096         NFSUNLOCKCLSTATE();
3097 }
3098
3099 /*
3100  * Check for duplicate open owners and opens.
3101  * (Only used as a diagnostic aid.)
3102  */
3103 void
3104 nfscl_dupopen(vnode_t vp, int dupopens)
3105 {
3106         struct nfsclclient *clp;
3107         struct nfsclowner *owp, *owp2;
3108         struct nfsclopen *op, *op2;
3109         struct nfsfh *nfhp;
3110
3111         clp = VFSTONFS(vp->v_mount)->nm_clp;
3112         if (clp == NULL) {
3113                 printf("nfscl dupopen NULL clp\n");
3114                 return;
3115         }
3116         nfhp = VTONFS(vp)->n_fhp;
3117         NFSLOCKCLSTATE();
3118
3119         /*
3120          * First, search for duplicate owners.
3121          * These should never happen!
3122          */
3123         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3124             LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3125                 if (owp != owp2 &&
3126                     !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3127                     NFSV4CL_LOCKNAMELEN)) {
3128                         NFSUNLOCKCLSTATE();
3129                         printf("DUP OWNER\n");
3130                         nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3131                         return;
3132                 }
3133             }
3134         }
3135
3136         /*
3137          * Now, search for duplicate stateids.
3138          * These shouldn't happen, either.
3139          */
3140         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3141             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3142                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3143                     LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3144                         if (op != op2 &&
3145                             (op->nfso_stateid.other[0] != 0 ||
3146                              op->nfso_stateid.other[1] != 0 ||
3147                              op->nfso_stateid.other[2] != 0) &&
3148                             op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3149                             op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3150                             op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3151                             NFSUNLOCKCLSTATE();
3152                             printf("DUP STATEID\n");
3153                             nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3154                             return;
3155                         }
3156                     }
3157                 }
3158             }
3159         }
3160
3161         /*
3162          * Now search for duplicate opens.
3163          * Duplicate opens for the same owner
3164          * should never occur. Other duplicates are
3165          * possible and are checked for if "dupopens"
3166          * is true.
3167          */
3168         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3169             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3170                 if (nfhp->nfh_len == op2->nfso_fhlen &&
3171                     !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3172                     LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3173                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3174                             if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3175                                 !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3176                                 (!NFSBCMP(op->nfso_own->nfsow_owner,
3177                                  op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3178                                  dupopens)) {
3179                                 if (!NFSBCMP(op->nfso_own->nfsow_owner,
3180                                     op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3181                                     NFSUNLOCKCLSTATE();
3182                                     printf("BADDUP OPEN\n");
3183                                 } else {
3184                                     NFSUNLOCKCLSTATE();
3185                                     printf("DUP OPEN\n");
3186                                 }
3187                                 nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3188                                     0);
3189                                 return;
3190                             }
3191                         }
3192                     }
3193                 }
3194             }
3195         }
3196         NFSUNLOCKCLSTATE();
3197 }
3198
3199 /*
3200  * During close, find an open that needs to be dereferenced and
3201  * dereference it. If there are no more opens for this file,
3202  * log a message to that effect.
3203  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3204  * on the file's vnode.
3205  * This is the safe way, since it is difficult to identify
3206  * which open the close is for and I/O can be performed after the
3207  * close(2) system call when a file is mmap'd.
3208  * If it returns 0 for success, there will be a referenced
3209  * clp returned via clpp.
3210  */
3211 int
3212 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3213 {
3214         struct nfsclclient *clp;
3215         struct nfsclowner *owp;
3216         struct nfsclopen *op;
3217         struct nfscldeleg *dp;
3218         struct nfsfh *nfhp;
3219         int error, notdecr;
3220
3221         error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3222         if (error)
3223                 return (error);
3224         *clpp = clp;
3225
3226         nfhp = VTONFS(vp)->n_fhp;
3227         notdecr = 1;
3228         NFSLOCKCLSTATE();
3229         /*
3230          * First, look for one under a delegation that was locally issued
3231          * and just decrement the opencnt for it. Since all my Opens against
3232          * the server are DENY_NONE, I don't see a problem with hanging
3233          * onto them. (It is much easier to use one of the extant Opens
3234          * that I already have on the server when a Delegation is recalled
3235          * than to do fresh Opens.) Someday, I might need to rethink this, but.
3236          */
3237         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3238         if (dp != NULL) {
3239                 LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3240                         op = LIST_FIRST(&owp->nfsow_open);
3241                         if (op != NULL) {
3242                                 /*
3243                                  * Since a delegation is for a file, there
3244                                  * should never be more than one open for
3245                                  * each openowner.
3246                                  */
3247                                 if (LIST_NEXT(op, nfso_list) != NULL)
3248                                         panic("nfscdeleg opens");
3249                                 if (notdecr && op->nfso_opencnt > 0) {
3250                                         notdecr = 0;
3251                                         op->nfso_opencnt--;
3252                                         break;
3253                                 }
3254                         }
3255                 }
3256         }
3257
3258         /* Now process the opens against the server. */
3259         LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3260             nfso_hash) {
3261                 if (op->nfso_fhlen == nfhp->nfh_len &&
3262                     !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3263                     nfhp->nfh_len)) {
3264                         /* Found an open, decrement cnt if possible */
3265                         if (notdecr && op->nfso_opencnt > 0) {
3266                                 notdecr = 0;
3267                                 op->nfso_opencnt--;
3268                         }
3269                         /*
3270                          * There are more opens, so just return.
3271                          */
3272                         if (op->nfso_opencnt > 0) {
3273                                 NFSUNLOCKCLSTATE();
3274                                 return (0);
3275                         }
3276                 }
3277         }
3278         NFSUNLOCKCLSTATE();
3279         if (notdecr)
3280                 printf("nfscl: never fnd open\n");
3281         return (0);
3282 }
3283
3284 int
3285 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3286 {
3287         struct nfsclclient *clp;
3288         struct nfsclowner *owp, *nowp;
3289         struct nfsclopen *op;
3290         struct nfscldeleg *dp;
3291         struct nfsfh *nfhp;
3292         struct nfsclrecalllayout *recallp;
3293         int error;
3294
3295         error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3296         if (error)
3297                 return (error);
3298         *clpp = clp;
3299
3300         nfhp = VTONFS(vp)->n_fhp;
3301         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3302         NFSLOCKCLSTATE();
3303         /*
3304          * First get rid of the local Open structures, which should be no
3305          * longer in use.
3306          */
3307         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3308         if (dp != NULL) {
3309                 LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3310                         op = LIST_FIRST(&owp->nfsow_open);
3311                         if (op != NULL) {
3312                                 KASSERT((op->nfso_opencnt == 0),
3313                                     ("nfscl: bad open cnt on deleg"));
3314                                 nfscl_freeopen(op, 1);
3315                         }
3316                         nfscl_freeopenowner(owp, 1);
3317                 }
3318         }
3319
3320         /* Return any layouts marked return on close. */
3321         nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3322
3323         /* Now process the opens against the server. */
3324 lookformore:
3325         LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3326             nfso_hash) {
3327                 if (op->nfso_fhlen == nfhp->nfh_len &&
3328                     !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3329                     nfhp->nfh_len)) {
3330                         /* Found an open, close it. */
3331 #ifdef DIAGNOSTIC
3332                         KASSERT((op->nfso_opencnt == 0),
3333                             ("nfscl: bad open cnt on server (%d)",
3334                              op->nfso_opencnt));
3335 #endif
3336                         NFSUNLOCKCLSTATE();
3337                         nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3338                         NFSLOCKCLSTATE();
3339                         goto lookformore;
3340                 }
3341         }
3342         NFSUNLOCKCLSTATE();
3343         /*
3344          * recallp has been set NULL by nfscl_retoncloselayout() if it was
3345          * used by the function, but calling free() with a NULL pointer is ok.
3346          */
3347         free(recallp, M_NFSLAYRECALL);
3348         return (0);
3349 }
3350
3351 /*
3352  * Return all delegations on this client.
3353  * (Must be called with client sleep lock.)
3354  */
3355 static void
3356 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3357 {
3358         struct nfscldeleg *dp, *ndp;
3359         struct ucred *cred;
3360
3361         cred = newnfs_getcred();
3362         TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3363                 nfscl_cleandeleg(dp);
3364                 (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3365                 nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3366         }
3367         NFSFREECRED(cred);
3368 }
3369
3370 /*
3371  * Return any delegation for this vp.
3372  */
3373 void
3374 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3375 {
3376         struct nfsclclient *clp;
3377         struct nfscldeleg *dp;
3378         struct ucred *cred;
3379         struct nfsnode *np;
3380         struct nfsmount *nmp;
3381
3382         nmp = VFSTONFS(vp->v_mount);
3383         NFSLOCKMNT(nmp);
3384         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
3385                 NFSUNLOCKMNT(nmp);
3386                 return;
3387         }
3388         NFSUNLOCKMNT(nmp);
3389         np = VTONFS(vp);
3390         cred = newnfs_getcred();
3391         dp = NULL;
3392         NFSLOCKCLSTATE();
3393         clp = nmp->nm_clp;
3394         if (clp != NULL)
3395                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3396                     np->n_fhp->nfh_len);
3397         if (dp != NULL) {
3398                 nfscl_cleandeleg(dp);
3399                 nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3400                 NFSUNLOCKCLSTATE();
3401                 newnfs_copycred(&dp->nfsdl_cred, cred);
3402                 nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3403                 free(dp, M_NFSCLDELEG);
3404         } else
3405                 NFSUNLOCKCLSTATE();
3406         NFSFREECRED(cred);
3407 }
3408
3409 /*
3410  * Do a callback RPC.
3411  */
3412 void
3413 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3414 {
3415         int clist, gotseq_ok, i, j, k, op, rcalls;
3416         u_int32_t *tl;
3417         struct nfsclclient *clp;
3418         struct nfscldeleg *dp = NULL;
3419         int numops, taglen = -1, error = 0, trunc __unused;
3420         u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3421         u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3422         vnode_t vp = NULL;
3423         struct nfsnode *np;
3424         struct vattr va;
3425         struct nfsfh *nfhp;
3426         mount_t mp;
3427         nfsattrbit_t attrbits, rattrbits;
3428         nfsv4stateid_t stateid;
3429         uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3430         uint8_t sessionid[NFSX_V4SESSIONID];
3431         struct mbuf *rep;
3432         struct nfscllayout *lyp;
3433         uint64_t filesid[2], len, off;
3434         int changed, gotone, laytype, recalltype;
3435         uint32_t iomode;
3436         struct nfsclrecalllayout *recallp = NULL;
3437         struct nfsclsession *tsep;
3438
3439         gotseq_ok = 0;
3440         nfsrvd_rephead(nd);
3441         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3442         taglen = fxdr_unsigned(int, *tl);
3443         if (taglen < 0) {
3444                 error = EBADRPC;
3445                 goto nfsmout;
3446         }
3447         if (taglen <= NFSV4_SMALLSTR)
3448                 tagstr = tag;
3449         else
3450                 tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3451         error = nfsrv_mtostr(nd, tagstr, taglen);
3452         if (error) {
3453                 if (taglen > NFSV4_SMALLSTR)
3454                         free(tagstr, M_TEMP);
3455                 taglen = -1;
3456                 goto nfsmout;
3457         }
3458         (void) nfsm_strtom(nd, tag, taglen);
3459         if (taglen > NFSV4_SMALLSTR) {
3460                 free(tagstr, M_TEMP);
3461         }
3462         NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3463         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3464         minorvers = fxdr_unsigned(u_int32_t, *tl++);
3465         if (minorvers != NFSV4_MINORVERSION &&
3466             minorvers != NFSV41_MINORVERSION &&
3467             minorvers != NFSV42_MINORVERSION)
3468                 nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3469         cbident = fxdr_unsigned(u_int32_t, *tl++);
3470         if (nd->nd_repstat)
3471                 numops = 0;
3472         else
3473                 numops = fxdr_unsigned(int, *tl);
3474         /*
3475          * Loop around doing the sub ops.
3476          */
3477         for (i = 0; i < numops; i++) {
3478                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3479                 NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3480                 *repp++ = *tl;
3481                 op = fxdr_unsigned(int, *tl);
3482                 if (op < NFSV4OP_CBGETATTR ||
3483                    (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3484                    (op > NFSV4OP_CBNOTIFYDEVID &&
3485                     minorvers == NFSV41_MINORVERSION) ||
3486                    (op > NFSV4OP_CBOFFLOAD &&
3487                     minorvers == NFSV42_MINORVERSION)) {
3488                     nd->nd_repstat = NFSERR_OPILLEGAL;
3489                     *repp = nfscl_errmap(nd, minorvers);
3490                     retops++;
3491                     break;
3492                 }
3493                 nd->nd_procnum = op;
3494                 if (op < NFSV42_CBNOPS)
3495                         nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3496                 switch (op) {
3497                 case NFSV4OP_CBGETATTR:
3498                         NFSCL_DEBUG(4, "cbgetattr\n");
3499                         mp = NULL;
3500                         vp = NULL;
3501                         error = nfsm_getfh(nd, &nfhp);
3502                         if (!error)
3503                                 error = nfsrv_getattrbits(nd, &attrbits,
3504                                     NULL, NULL);
3505                         if (error == 0 && i == 0 &&
3506                             minorvers != NFSV4_MINORVERSION)
3507                                 error = NFSERR_OPNOTINSESS;
3508                         if (!error) {
3509                                 mp = nfscl_getmnt(minorvers, sessionid, cbident,
3510                                     &clp);
3511                                 if (mp == NULL)
3512                                         error = NFSERR_SERVERFAULT;
3513                         }
3514                         if (!error) {
3515                                 error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3516                                     nfhp->nfh_len, p, &np);
3517                                 if (!error)
3518                                         vp = NFSTOV(np);
3519                         }
3520                         if (!error) {
3521                                 NFSZERO_ATTRBIT(&rattrbits);
3522                                 NFSLOCKCLSTATE();
3523                                 dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3524                                     nfhp->nfh_len);
3525                                 if (dp != NULL) {
3526                                         if (NFSISSET_ATTRBIT(&attrbits,
3527                                             NFSATTRBIT_SIZE)) {
3528                                                 if (vp != NULL)
3529                                                         va.va_size = np->n_size;
3530                                                 else
3531                                                         va.va_size =
3532                                                             dp->nfsdl_size;
3533                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3534                                                     NFSATTRBIT_SIZE);
3535                                         }
3536                                         if (NFSISSET_ATTRBIT(&attrbits,
3537                                             NFSATTRBIT_CHANGE)) {
3538                                                 va.va_filerev =
3539                                                     dp->nfsdl_change;
3540                                                 if (vp == NULL ||
3541                                                     (np->n_flag & NDELEGMOD))
3542                                                         va.va_filerev++;
3543                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3544                                                     NFSATTRBIT_CHANGE);
3545                                         }
3546                                 } else
3547                                         error = NFSERR_SERVERFAULT;
3548                                 NFSUNLOCKCLSTATE();
3549                         }
3550                         if (vp != NULL)
3551                                 vrele(vp);
3552                         if (mp != NULL)
3553                                 vfs_unbusy(mp);
3554                         if (nfhp != NULL)
3555                                 free(nfhp, M_NFSFH);
3556                         if (!error)
3557                                 (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3558                                     NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3559                                     (uint64_t)0, NULL);
3560                         break;
3561                 case NFSV4OP_CBRECALL:
3562                         NFSCL_DEBUG(4, "cbrecall\n");
3563                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3564                             NFSX_UNSIGNED);
3565                         stateid.seqid = *tl++;
3566                         NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3567                             NFSX_STATEIDOTHER);
3568                         tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3569                         trunc = fxdr_unsigned(int, *tl);
3570                         error = nfsm_getfh(nd, &nfhp);
3571                         if (error == 0 && i == 0 &&
3572                             minorvers != NFSV4_MINORVERSION)
3573                                 error = NFSERR_OPNOTINSESS;
3574                         if (!error) {
3575                                 NFSLOCKCLSTATE();
3576                                 if (minorvers == NFSV4_MINORVERSION)
3577                                         clp = nfscl_getclnt(cbident);
3578                                 else
3579                                         clp = nfscl_getclntsess(sessionid);
3580                                 if (clp != NULL) {
3581                                         dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3582                                             nfhp->nfh_len);
3583                                         if (dp != NULL && (dp->nfsdl_flags &
3584                                             NFSCLDL_DELEGRET) == 0) {
3585                                                 dp->nfsdl_flags |=
3586                                                     NFSCLDL_RECALL;
3587                                                 wakeup((caddr_t)clp);
3588                                         }
3589                                 } else {
3590                                         error = NFSERR_SERVERFAULT;
3591                                 }
3592                                 NFSUNLOCKCLSTATE();
3593                         }
3594                         if (nfhp != NULL)
3595                                 free(nfhp, M_NFSFH);
3596                         break;
3597                 case NFSV4OP_CBLAYOUTRECALL:
3598                         NFSCL_DEBUG(4, "cblayrec\n");
3599                         nfhp = NULL;
3600                         NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3601                         laytype = fxdr_unsigned(int, *tl++);
3602                         iomode = fxdr_unsigned(uint32_t, *tl++);
3603                         if (newnfs_true == *tl++)
3604                                 changed = 1;
3605                         else
3606                                 changed = 0;
3607                         recalltype = fxdr_unsigned(int, *tl);
3608                         NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3609                             laytype, iomode, changed, recalltype);
3610                         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3611                             M_WAITOK);
3612                         if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3613                             laytype != NFSLAYOUT_FLEXFILE)
3614                                 error = NFSERR_NOMATCHLAYOUT;
3615                         else if (recalltype == NFSLAYOUTRETURN_FILE) {
3616                                 error = nfsm_getfh(nd, &nfhp);
3617                                 NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3618                                 if (error != 0)
3619                                         goto nfsmout;
3620                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3621                                     NFSX_STATEID);
3622                                 off = fxdr_hyper(tl); tl += 2;
3623                                 len = fxdr_hyper(tl); tl += 2;
3624                                 stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3625                                 NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3626                                 if (minorvers == NFSV4_MINORVERSION)
3627                                         error = NFSERR_NOTSUPP;
3628                                 else if (i == 0)
3629                                         error = NFSERR_OPNOTINSESS;
3630                                 NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3631                                     (uintmax_t)off, (uintmax_t)len,
3632                                     stateid.seqid, error);
3633                                 if (error == 0) {
3634                                         NFSLOCKCLSTATE();
3635                                         clp = nfscl_getclntsess(sessionid);
3636                                         NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3637                                         if (clp != NULL) {
3638                                                 lyp = nfscl_findlayout(clp,
3639                                                     nfhp->nfh_fh,
3640                                                     nfhp->nfh_len);
3641                                                 NFSCL_DEBUG(4, "cblyp=%p\n",
3642                                                     lyp);
3643                                                 if (lyp != NULL &&
3644                                                     (lyp->nfsly_flags &
3645                                                      (NFSLY_FILES |
3646                                                       NFSLY_FLEXFILE)) != 0 &&
3647                                                     !NFSBCMP(stateid.other,
3648                                                     lyp->nfsly_stateid.other,
3649                                                     NFSX_STATEIDOTHER)) {
3650                                                         error =
3651                                                             nfscl_layoutrecall(
3652                                                             recalltype,
3653                                                             lyp, iomode, off,
3654                                                             len, stateid.seqid,
3655                                                             0, 0, NULL,
3656                                                             recallp);
3657                                                         if (error == 0 &&
3658                                                             stateid.seqid >
3659                                                             lyp->nfsly_stateid.seqid)
3660                                                                 lyp->nfsly_stateid.seqid =
3661                                                                     stateid.seqid;
3662                                                         recallp = NULL;
3663                                                         wakeup(clp);
3664                                                         NFSCL_DEBUG(4,
3665                                                             "aft layrcal=%d "
3666                                                             "layseqid=%d\n",
3667                                                             error,
3668                                                             lyp->nfsly_stateid.seqid);
3669                                                 } else
3670                                                         error =
3671                                                           NFSERR_NOMATCHLAYOUT;
3672                                         } else
3673                                                 error = NFSERR_NOMATCHLAYOUT;
3674                                         NFSUNLOCKCLSTATE();
3675                                 }
3676                                 free(nfhp, M_NFSFH);
3677                         } else if (recalltype == NFSLAYOUTRETURN_FSID) {
3678                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3679                                 filesid[0] = fxdr_hyper(tl); tl += 2;
3680                                 filesid[1] = fxdr_hyper(tl); tl += 2;
3681                                 gotone = 0;
3682                                 NFSLOCKCLSTATE();
3683                                 clp = nfscl_getclntsess(sessionid);
3684                                 if (clp != NULL) {
3685                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3686                                             nfsly_list) {
3687                                                 if (lyp->nfsly_filesid[0] ==
3688                                                     filesid[0] &&
3689                                                     lyp->nfsly_filesid[1] ==
3690                                                     filesid[1]) {
3691                                                         error =
3692                                                             nfscl_layoutrecall(
3693                                                             recalltype,
3694                                                             lyp, iomode, 0,
3695                                                             UINT64_MAX,
3696                                                             lyp->nfsly_stateid.seqid,
3697                                                             0, 0, NULL,
3698                                                             recallp);
3699                                                         recallp = NULL;
3700                                                         gotone = 1;
3701                                                 }
3702                                         }
3703                                         if (gotone != 0)
3704                                                 wakeup(clp);
3705                                         else
3706                                                 error = NFSERR_NOMATCHLAYOUT;
3707                                 } else
3708                                         error = NFSERR_NOMATCHLAYOUT;
3709                                 NFSUNLOCKCLSTATE();
3710                         } else if (recalltype == NFSLAYOUTRETURN_ALL) {
3711                                 gotone = 0;
3712                                 NFSLOCKCLSTATE();
3713                                 clp = nfscl_getclntsess(sessionid);
3714                                 if (clp != NULL) {
3715                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3716                                             nfsly_list) {
3717                                                 error = nfscl_layoutrecall(
3718                                                     recalltype, lyp, iomode, 0,
3719                                                     UINT64_MAX,
3720                                                     lyp->nfsly_stateid.seqid,
3721                                                     0, 0, NULL, recallp);
3722                                                 recallp = NULL;
3723                                                 gotone = 1;
3724                                         }
3725                                         if (gotone != 0)
3726                                                 wakeup(clp);
3727                                         else
3728                                                 error = NFSERR_NOMATCHLAYOUT;
3729                                 } else
3730                                         error = NFSERR_NOMATCHLAYOUT;
3731                                 NFSUNLOCKCLSTATE();
3732                         } else
3733                                 error = NFSERR_NOMATCHLAYOUT;
3734                         if (recallp != NULL) {
3735                                 free(recallp, M_NFSLAYRECALL);
3736                                 recallp = NULL;
3737                         }
3738                         break;
3739                 case NFSV4OP_CBSEQUENCE:
3740                         NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3741                             5 * NFSX_UNSIGNED);
3742                         bcopy(tl, sessionid, NFSX_V4SESSIONID);
3743                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3744                         seqid = fxdr_unsigned(uint32_t, *tl++);
3745                         slotid = fxdr_unsigned(uint32_t, *tl++);
3746                         highslot = fxdr_unsigned(uint32_t, *tl++);
3747                         cachethis = *tl++;
3748                         /* Throw away the referring call stuff. */
3749                         clist = fxdr_unsigned(int, *tl);
3750                         for (j = 0; j < clist; j++) {
3751                                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3752                                     NFSX_UNSIGNED);
3753                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3754                                 rcalls = fxdr_unsigned(int, *tl);
3755                                 for (k = 0; k < rcalls; k++) {
3756                                         NFSM_DISSECT(tl, uint32_t *,
3757                                             2 * NFSX_UNSIGNED);
3758                                 }
3759                         }
3760                         NFSLOCKCLSTATE();
3761                         if (i == 0) {
3762                                 clp = nfscl_getclntsess(sessionid);
3763                                 if (clp == NULL)
3764                                         error = NFSERR_SERVERFAULT;
3765                         } else
3766                                 error = NFSERR_SEQUENCEPOS;
3767                         if (error == 0) {
3768                                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3769                                 error = nfsv4_seqsession(seqid, slotid,
3770                                     highslot, tsep->nfsess_cbslots, &rep,
3771                                     tsep->nfsess_backslots);
3772                         }
3773                         NFSUNLOCKCLSTATE();
3774                         if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3775                                 gotseq_ok = 1;
3776                                 if (rep != NULL) {
3777                                         /*
3778                                          * Handle a reply for a retried
3779                                          * callback.  The reply will be
3780                                          * re-inserted in the session cache
3781                                          * by the nfsv4_seqsess_cacherep() call
3782                                          * after out:
3783                                          */
3784                                         KASSERT(error == NFSERR_REPLYFROMCACHE,
3785                                             ("cbsequence: non-NULL rep"));
3786                                         NFSCL_DEBUG(4, "Got cbretry\n");
3787                                         m_freem(nd->nd_mreq);
3788                                         nd->nd_mreq = rep;
3789                                         rep = NULL;
3790                                         goto out;
3791                                 }
3792                                 NFSM_BUILD(tl, uint32_t *,
3793                                     NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3794                                 bcopy(sessionid, tl, NFSX_V4SESSIONID);
3795                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3796                                 *tl++ = txdr_unsigned(seqid);
3797                                 *tl++ = txdr_unsigned(slotid);
3798                                 *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3799                                 *tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3800                         }
3801                         break;
3802                 default:
3803                         if (i == 0 && minorvers != NFSV4_MINORVERSION)
3804                                 error = NFSERR_OPNOTINSESS;
3805                         else {
3806                                 NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3807                                 error = NFSERR_NOTSUPP;
3808                         }
3809                         break;
3810                 }
3811                 if (error) {
3812                         if (error == EBADRPC || error == NFSERR_BADXDR) {
3813                                 nd->nd_repstat = NFSERR_BADXDR;
3814                         } else {
3815                                 nd->nd_repstat = error;
3816                         }
3817                         error = 0;
3818                 }
3819                 retops++;
3820                 if (nd->nd_repstat) {
3821                         *repp = nfscl_errmap(nd, minorvers);
3822                         break;
3823                 } else
3824                         *repp = 0;      /* NFS4_OK */
3825         }
3826 nfsmout:
3827         if (recallp != NULL)
3828                 free(recallp, M_NFSLAYRECALL);
3829         if (error) {
3830                 if (error == EBADRPC || error == NFSERR_BADXDR)
3831                         nd->nd_repstat = NFSERR_BADXDR;
3832                 else
3833                         printf("nfsv4 comperr1=%d\n", error);
3834         }
3835         if (taglen == -1) {
3836                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3837                 *tl++ = 0;
3838                 *tl = 0;
3839         } else {
3840                 *retopsp = txdr_unsigned(retops);
3841         }
3842         *nd->nd_errp = nfscl_errmap(nd, minorvers);
3843 out:
3844         if (gotseq_ok != 0) {
3845                 rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3846                 NFSLOCKCLSTATE();
3847                 clp = nfscl_getclntsess(sessionid);
3848                 if (clp != NULL) {
3849                         tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3850                         nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3851                             NFSERR_OK, &rep);
3852                         NFSUNLOCKCLSTATE();
3853                 } else {
3854                         NFSUNLOCKCLSTATE();
3855                         m_freem(rep);
3856                 }
3857         }
3858 }
3859
3860 /*
3861  * Generate the next cbident value. Basically just increment a static value
3862  * and then check that it isn't already in the list, if it has wrapped around.
3863  */
3864 static u_int32_t
3865 nfscl_nextcbident(void)
3866 {
3867         struct nfsclclient *clp;
3868         int matched;
3869         static u_int32_t nextcbident = 0;
3870         static int haswrapped = 0;
3871
3872         nextcbident++;
3873         if (nextcbident == 0)
3874                 haswrapped = 1;
3875         if (haswrapped) {
3876                 /*
3877                  * Search the clientid list for one already using this cbident.
3878                  */
3879                 do {
3880                         matched = 0;
3881                         NFSLOCKCLSTATE();
3882                         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3883                                 if (clp->nfsc_cbident == nextcbident) {
3884                                         matched = 1;
3885                                         break;
3886                                 }
3887                         }
3888                         NFSUNLOCKCLSTATE();
3889                         if (matched == 1)
3890                                 nextcbident++;
3891                 } while (matched);
3892         }
3893         return (nextcbident);
3894 }
3895
3896 /*
3897  * Get the mount point related to a given cbident or session and busy it.
3898  */
3899 static mount_t
3900 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3901     struct nfsclclient **clpp)
3902 {
3903         struct nfsclclient *clp;
3904         mount_t mp;
3905         int error;
3906         struct nfsclsession *tsep;
3907
3908         *clpp = NULL;
3909         NFSLOCKCLSTATE();
3910         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3911                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3912                 if (minorvers == NFSV4_MINORVERSION) {
3913                         if (clp->nfsc_cbident == cbident)
3914                                 break;
3915                 } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3916                     NFSX_V4SESSIONID))
3917                         break;
3918         }
3919         if (clp == NULL) {
3920                 NFSUNLOCKCLSTATE();
3921                 return (NULL);
3922         }
3923         mp = clp->nfsc_nmp->nm_mountp;
3924         vfs_ref(mp);
3925         NFSUNLOCKCLSTATE();
3926         error = vfs_busy(mp, 0);
3927         vfs_rel(mp);
3928         if (error != 0)
3929                 return (NULL);
3930         *clpp = clp;
3931         return (mp);
3932 }
3933
3934 /*
3935  * Get the clientid pointer related to a given cbident.
3936  */
3937 static struct nfsclclient *
3938 nfscl_getclnt(u_int32_t cbident)
3939 {
3940         struct nfsclclient *clp;
3941
3942         LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3943                 if (clp->nfsc_cbident == cbident)
3944                         break;
3945         return (clp);
3946 }
3947
3948 /*
3949  * Get the clientid pointer related to a given sessionid.
3950  */
3951 static struct nfsclclient *
3952 nfscl_getclntsess(uint8_t *sessionid)
3953 {
3954         struct nfsclclient *clp;
3955         struct nfsclsession *tsep;
3956
3957         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3958                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3959                 if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3960                     NFSX_V4SESSIONID))
3961                         break;
3962         }
3963         return (clp);
3964 }
3965
3966 /*
3967  * Search for a lock conflict locally on the client. A conflict occurs if
3968  * - not same owner and overlapping byte range and at least one of them is
3969  *   a write lock or this is an unlock.
3970  */
3971 static int
3972 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3973     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3974     struct nfscllock **lopp)
3975 {
3976         struct nfsclopen *op;
3977         int ret;
3978
3979         if (dp != NULL) {
3980                 ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3981                 if (ret)
3982                         return (ret);
3983         }
3984         LIST_FOREACH(op, NFSCLOPENHASH(clp, fhp, fhlen), nfso_hash) {
3985                 if (op->nfso_fhlen == fhlen &&
3986                     !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3987                         ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3988                             own, lopp);
3989                         if (ret)
3990                                 return (ret);
3991                 }
3992         }
3993         return (0);
3994 }
3995
3996 static int
3997 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3998     u_int8_t *own, struct nfscllock **lopp)
3999 {
4000         struct nfscllockowner *lp;
4001         struct nfscllock *lop;
4002
4003         LIST_FOREACH(lp, lhp, nfsl_list) {
4004                 if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
4005                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
4006                                 if (lop->nfslo_first >= nlop->nfslo_end)
4007                                         break;
4008                                 if (lop->nfslo_end <= nlop->nfslo_first)
4009                                         continue;
4010                                 if (lop->nfslo_type == F_WRLCK ||
4011                                     nlop->nfslo_type == F_WRLCK ||
4012                                     nlop->nfslo_type == F_UNLCK) {
4013                                         if (lopp != NULL)
4014                                                 *lopp = lop;
4015                                         return (NFSERR_DENIED);
4016                                 }
4017                         }
4018                 }
4019         }
4020         return (0);
4021 }
4022
4023 /*
4024  * Check for a local conflicting lock.
4025  */
4026 int
4027 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
4028     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
4029 {
4030         struct nfscllock *lop, nlck;
4031         struct nfscldeleg *dp;
4032         struct nfsnode *np;
4033         u_int8_t own[NFSV4CL_LOCKNAMELEN];
4034         int error;
4035
4036         nlck.nfslo_type = fl->l_type;
4037         nlck.nfslo_first = off;
4038         if (len == NFS64BITSSET) {
4039                 nlck.nfslo_end = NFS64BITSSET;
4040         } else {
4041                 nlck.nfslo_end = off + len;
4042                 if (nlck.nfslo_end <= nlck.nfslo_first)
4043                         return (NFSERR_INVAL);
4044         }
4045         np = VTONFS(vp);
4046         nfscl_filllockowner(id, own, flags);
4047         NFSLOCKCLSTATE();
4048         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4049         error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4050             &nlck, own, dp, &lop);
4051         if (error != 0) {
4052                 fl->l_whence = SEEK_SET;
4053                 fl->l_start = lop->nfslo_first;
4054                 if (lop->nfslo_end == NFS64BITSSET)
4055                         fl->l_len = 0;
4056                 else
4057                         fl->l_len = lop->nfslo_end - lop->nfslo_first;
4058                 fl->l_pid = (pid_t)0;
4059                 fl->l_type = lop->nfslo_type;
4060                 error = -1;                     /* no RPC required */
4061         } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4062             fl->l_type == F_RDLCK)) {
4063                 /*
4064                  * The delegation ensures that there isn't a conflicting
4065                  * lock on the server, so return -1 to indicate an RPC
4066                  * isn't required.
4067                  */
4068                 fl->l_type = F_UNLCK;
4069                 error = -1;
4070         }
4071         NFSUNLOCKCLSTATE();
4072         return (error);
4073 }
4074
4075 /*
4076  * Handle Recall of a delegation.
4077  * The clp must be exclusive locked when this is called.
4078  */
4079 static int
4080 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4081     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4082     int called_from_renewthread, vnode_t *vpp)
4083 {
4084         struct nfsclowner *owp, *lowp, *nowp;
4085         struct nfsclopen *op, *lop;
4086         struct nfscllockowner *lp;
4087         struct nfscllock *lckp;
4088         struct nfsnode *np;
4089         int error = 0, ret;
4090
4091         if (vp == NULL) {
4092                 KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4093                 *vpp = NULL;
4094                 /*
4095                  * First, get a vnode for the file. This is needed to do RPCs.
4096                  */
4097                 ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4098                     dp->nfsdl_fhlen, p, &np);
4099                 if (ret) {
4100                         /*
4101                          * File isn't open, so nothing to move over to the
4102                          * server.
4103                          */
4104                         return (0);
4105                 }
4106                 vp = NFSTOV(np);
4107                 *vpp = vp;
4108         } else {
4109                 np = VTONFS(vp);
4110         }
4111         dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4112
4113         /*
4114          * Ok, if it's a write delegation, flush data to the server, so
4115          * that close/open consistency is retained.
4116          */
4117         ret = 0;
4118         NFSLOCKNODE(np);
4119         if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4120                 np->n_flag |= NDELEGRECALL;
4121                 NFSUNLOCKNODE(np);
4122                 ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4123                 NFSLOCKNODE(np);
4124                 np->n_flag &= ~NDELEGRECALL;
4125         }
4126         NFSINVALATTRCACHE(np);
4127         NFSUNLOCKNODE(np);
4128         if (ret == EIO && called_from_renewthread != 0) {
4129                 /*
4130                  * If the flush failed with EIO for the renew thread,
4131                  * return now, so that the dirty buffer will be flushed
4132                  * later.
4133                  */
4134                 return (ret);
4135         }
4136
4137         /*
4138          * Now, for each openowner with opens issued locally, move them
4139          * over to state against the server.
4140          */
4141         LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4142                 lop = LIST_FIRST(&lowp->nfsow_open);
4143                 if (lop != NULL) {
4144                         if (LIST_NEXT(lop, nfso_list) != NULL)
4145                                 panic("nfsdlg mult opens");
4146                         /*
4147                          * Look for the same openowner against the server.
4148                          */
4149                         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4150                                 if (!NFSBCMP(lowp->nfsow_owner,
4151                                     owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4152                                         newnfs_copycred(&dp->nfsdl_cred, cred);
4153                                         ret = nfscl_moveopen(vp, clp, nmp, lop,
4154                                             owp, dp, cred, p);
4155                                         if (ret == NFSERR_STALECLIENTID ||
4156                                             ret == NFSERR_STALEDONTRECOVER ||
4157                                             ret == NFSERR_BADSESSION)
4158                                                 return (ret);
4159                                         if (ret) {
4160                                                 nfscl_freeopen(lop, 1);
4161                                                 if (!error)
4162                                                         error = ret;
4163                                         }
4164                                         break;
4165                                 }
4166                         }
4167
4168                         /*
4169                          * If no openowner found, create one and get an open
4170                          * for it.
4171                          */
4172                         if (owp == NULL) {
4173                                 nowp = malloc(
4174                                     sizeof (struct nfsclowner), M_NFSCLOWNER,
4175                                     M_WAITOK);
4176                                 nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
4177                                     NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4178                                     dp->nfsdl_fhlen, NULL, NULL);
4179                                 newnfs_copycred(&dp->nfsdl_cred, cred);
4180                                 ret = nfscl_moveopen(vp, clp, nmp, lop,
4181                                     owp, dp, cred, p);
4182                                 if (ret) {
4183                                         nfscl_freeopenowner(owp, 0);
4184                                         if (ret == NFSERR_STALECLIENTID ||
4185                                             ret == NFSERR_STALEDONTRECOVER ||
4186                                             ret == NFSERR_BADSESSION)
4187                                                 return (ret);
4188                                         if (ret) {
4189                                                 nfscl_freeopen(lop, 1);
4190                                                 if (!error)
4191                                                         error = ret;
4192                                         }
4193                                 }
4194                         }
4195                 }
4196         }
4197
4198         /*
4199          * Now, get byte range locks for any locks done locally.
4200          */
4201         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4202                 LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4203                         newnfs_copycred(&dp->nfsdl_cred, cred);
4204                         ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4205                         if (ret == NFSERR_STALESTATEID ||
4206                             ret == NFSERR_STALEDONTRECOVER ||
4207                             ret == NFSERR_STALECLIENTID ||
4208                             ret == NFSERR_BADSESSION)
4209                                 return (ret);
4210                         if (ret && !error)
4211                                 error = ret;
4212                 }
4213         }
4214         return (error);
4215 }
4216
4217 /*
4218  * Move a locally issued open over to an owner on the state list.
4219  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4220  * returns with it unlocked.
4221  */
4222 static int
4223 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4224     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4225     struct ucred *cred, NFSPROC_T *p)
4226 {
4227         struct nfsclopen *op, *nop;
4228         struct nfscldeleg *ndp;
4229         struct nfsnode *np;
4230         int error = 0, newone;
4231
4232         /*
4233          * First, look for an appropriate open, If found, just increment the
4234          * opencnt in it.
4235          */
4236         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4237                 if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4238                     op->nfso_fhlen == lop->nfso_fhlen &&
4239                     !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4240                         op->nfso_opencnt += lop->nfso_opencnt;
4241                         nfscl_freeopen(lop, 1);
4242                         return (0);
4243                 }
4244         }
4245
4246         /* No appropriate open, so we have to do one against the server. */
4247         np = VTONFS(vp);
4248         nop = malloc(sizeof (struct nfsclopen) +
4249             lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4250         nop->nfso_hash.le_prev = NULL;
4251         newone = 0;
4252         nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4253             lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4254         ndp = dp;
4255         error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4256             lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4257             NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4258         if (error) {
4259                 if (newone)
4260                         nfscl_freeopen(op, 0);
4261         } else {
4262                 op->nfso_mode |= lop->nfso_mode;
4263                 op->nfso_opencnt += lop->nfso_opencnt;
4264                 nfscl_freeopen(lop, 1);
4265         }
4266         if (nop != NULL)
4267                 free(nop, M_NFSCLOPEN);
4268         if (ndp != NULL) {
4269                 /*
4270                  * What should I do with the returned delegation, since the
4271                  * delegation is being recalled? For now, just printf and
4272                  * through it away.
4273                  */
4274                 printf("Moveopen returned deleg\n");
4275                 free(ndp, M_NFSCLDELEG);
4276         }
4277         return (error);
4278 }
4279
4280 /*
4281  * Recall all delegations on this client.
4282  */
4283 static void
4284 nfscl_totalrecall(struct nfsclclient *clp)
4285 {
4286         struct nfscldeleg *dp;
4287
4288         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4289                 if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4290                         dp->nfsdl_flags |= NFSCLDL_RECALL;
4291         }
4292 }
4293
4294 /*
4295  * Relock byte ranges. Called for delegation recall and state expiry.
4296  */
4297 static int
4298 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4299     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4300     NFSPROC_T *p)
4301 {
4302         struct nfscllockowner *nlp;
4303         struct nfsfh *nfhp;
4304         u_int64_t off, len;
4305         int error, newone, donelocally;
4306
4307         off = lop->nfslo_first;
4308         len = lop->nfslo_end - lop->nfslo_first;
4309         error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4310             clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4311             lp->nfsl_openowner, &nlp, &newone, &donelocally);
4312         if (error || donelocally)
4313                 return (error);
4314         nfhp = VTONFS(vp)->n_fhp;
4315         error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4316             nfhp->nfh_len, nlp, newone, 0, off,
4317             len, lop->nfslo_type, cred, p);
4318         if (error)
4319                 nfscl_freelockowner(nlp, 0);
4320         return (error);
4321 }
4322
4323 /*
4324  * Called to re-open a file. Basically get a vnode for the file handle
4325  * and then call nfsrpc_openrpc() to do the rest.
4326  */
4327 static int
4328 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4329     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4330     struct ucred *cred, NFSPROC_T *p)
4331 {
4332         struct nfsnode *np;
4333         vnode_t vp;
4334         int error;
4335
4336         error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4337         if (error)
4338                 return (error);
4339         vp = NFSTOV(np);
4340         if (np->n_v4 != NULL) {
4341                 error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4342                     np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4343                     NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4344                     cred, p);
4345         } else {
4346                 error = EINVAL;
4347         }
4348         vrele(vp);
4349         return (error);
4350 }
4351
4352 /*
4353  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4354  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4355  * fail.
4356  */
4357 static int
4358 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4359     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4360     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4361     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4362 {
4363         int error;
4364
4365         do {
4366                 error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4367                     mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4368                     0, 0);
4369                 if (error == NFSERR_DELAY)
4370                         (void) nfs_catnap(PZERO, error, "nfstryop");
4371         } while (error == NFSERR_DELAY);
4372         if (error == EAUTH || error == EACCES) {
4373                 /* Try again using system credentials */
4374                 newnfs_setroot(cred);
4375                 do {
4376                     error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4377                         newfhlen, mode, op, name, namelen, ndpp, reclaim,
4378                         delegtype, cred, p, 1, 0);
4379                     if (error == NFSERR_DELAY)
4380                         (void) nfs_catnap(PZERO, error, "nfstryop");
4381                 } while (error == NFSERR_DELAY);
4382         }
4383         return (error);
4384 }
4385
4386 /*
4387  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4388  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4389  * cred don't work.
4390  */
4391 static int
4392 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4393     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4394     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4395 {
4396         struct nfsrv_descript nfsd, *nd = &nfsd;
4397         int error;
4398
4399         do {
4400                 error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4401                     reclaim, off, len, type, cred, p, 0);
4402                 if (!error && nd->nd_repstat == NFSERR_DELAY)
4403                         (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4404                             "nfstrylck");
4405         } while (!error && nd->nd_repstat == NFSERR_DELAY);
4406         if (!error)
4407                 error = nd->nd_repstat;
4408         if (error == EAUTH || error == EACCES) {
4409                 /* Try again using root credentials */
4410                 newnfs_setroot(cred);
4411                 do {
4412                         error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4413                             newone, reclaim, off, len, type, cred, p, 1);
4414                         if (!error && nd->nd_repstat == NFSERR_DELAY)
4415                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4416                                     "nfstrylck");
4417                 } while (!error && nd->nd_repstat == NFSERR_DELAY);
4418                 if (!error)
4419                         error = nd->nd_repstat;
4420         }
4421         return (error);
4422 }
4423
4424 /*
4425  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4426  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4427  * credentials fail.
4428  */
4429 static int
4430 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4431     struct nfsmount *nmp, NFSPROC_T *p)
4432 {
4433         int error;
4434
4435         do {
4436                 error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4437                 if (error == NFSERR_DELAY)
4438                         (void) nfs_catnap(PZERO, error, "nfstrydp");
4439         } while (error == NFSERR_DELAY);
4440         if (error == EAUTH || error == EACCES) {
4441                 /* Try again using system credentials */
4442                 newnfs_setroot(cred);
4443                 do {
4444                         error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4445                         if (error == NFSERR_DELAY)
4446                                 (void) nfs_catnap(PZERO, error, "nfstrydp");
4447                 } while (error == NFSERR_DELAY);
4448         }
4449         return (error);
4450 }
4451
4452 /*
4453  * Try a close against the server. Just call nfsrpc_closerpc(),
4454  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4455  * credentials fail.
4456  */
4457 int
4458 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4459     struct nfsmount *nmp, NFSPROC_T *p)
4460 {
4461         struct nfsrv_descript nfsd, *nd = &nfsd;
4462         int error;
4463
4464         do {
4465                 error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4466                 if (error == NFSERR_DELAY)
4467                         (void) nfs_catnap(PZERO, error, "nfstrycl");
4468         } while (error == NFSERR_DELAY);
4469         if (error == EAUTH || error == EACCES) {
4470                 /* Try again using system credentials */
4471                 newnfs_setroot(cred);
4472                 do {
4473                         error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4474                         if (error == NFSERR_DELAY)
4475                                 (void) nfs_catnap(PZERO, error, "nfstrycl");
4476                 } while (error == NFSERR_DELAY);
4477         }
4478         return (error);
4479 }
4480
4481 /*
4482  * Decide if a delegation on a file permits close without flushing writes
4483  * to the server. This might be a big performance win in some environments.
4484  * (Not useful until the client does caching on local stable storage.)
4485  */
4486 int
4487 nfscl_mustflush(vnode_t vp)
4488 {
4489         struct nfsclclient *clp;
4490         struct nfscldeleg *dp;
4491         struct nfsnode *np;
4492         struct nfsmount *nmp;
4493
4494         np = VTONFS(vp);
4495         nmp = VFSTONFS(vp->v_mount);
4496         if (!NFSHASNFSV4(nmp))
4497                 return (1);
4498         NFSLOCKCLSTATE();
4499         clp = nfscl_findcl(nmp);
4500         if (clp == NULL) {
4501                 NFSUNLOCKCLSTATE();
4502                 return (1);
4503         }
4504         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4505         if (dp != NULL && (dp->nfsdl_flags &
4506             (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4507              NFSCLDL_WRITE &&
4508             (dp->nfsdl_sizelimit >= np->n_size ||
4509              !NFSHASSTRICT3530(nmp))) {
4510                 NFSUNLOCKCLSTATE();
4511                 return (0);
4512         }
4513         NFSUNLOCKCLSTATE();
4514         return (1);
4515 }
4516
4517 /*
4518  * See if a (write) delegation exists for this file.
4519  */
4520 int
4521 nfscl_nodeleg(vnode_t vp, int writedeleg)
4522 {
4523         struct nfsclclient *clp;
4524         struct nfscldeleg *dp;
4525         struct nfsnode *np;
4526         struct nfsmount *nmp;
4527
4528         np = VTONFS(vp);
4529         nmp = VFSTONFS(vp->v_mount);
4530         if (!NFSHASNFSV4(nmp))
4531                 return (1);
4532         NFSLOCKMNT(nmp);
4533         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4534                 NFSUNLOCKMNT(nmp);
4535                 return (1);
4536         }
4537         NFSUNLOCKMNT(nmp);
4538         NFSLOCKCLSTATE();
4539         clp = nfscl_findcl(nmp);
4540         if (clp == NULL) {
4541                 NFSUNLOCKCLSTATE();
4542                 return (1);
4543         }
4544         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4545         if (dp != NULL &&
4546             (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4547             (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4548              NFSCLDL_WRITE)) {
4549                 NFSUNLOCKCLSTATE();
4550                 return (0);
4551         }
4552         NFSUNLOCKCLSTATE();
4553         return (1);
4554 }
4555
4556 /*
4557  * Look for an associated delegation that should be DelegReturned.
4558  */
4559 int
4560 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4561 {
4562         struct nfsclclient *clp;
4563         struct nfscldeleg *dp;
4564         struct nfsclowner *owp;
4565         struct nfscllockowner *lp;
4566         struct nfsmount *nmp;
4567         struct ucred *cred;
4568         struct nfsnode *np;
4569         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4570
4571         nmp = VFSTONFS(vp->v_mount);
4572         np = VTONFS(vp);
4573         NFSLOCKCLSTATE();
4574         /*
4575          * Loop around waiting for:
4576          * - outstanding I/O operations on delegations to complete
4577          * - for a delegation on vp that has state, lock the client and
4578          *   do a recall
4579          * - return delegation with no state
4580          */
4581         while (1) {
4582                 clp = nfscl_findcl(nmp);
4583                 if (clp == NULL) {
4584                         NFSUNLOCKCLSTATE();
4585                         return (retcnt);
4586                 }
4587                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4588                     np->n_fhp->nfh_len);
4589                 if (dp != NULL) {
4590                     /*
4591                      * Wait for outstanding I/O ops to be done.
4592                      */
4593                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4594                         if (igotlock) {
4595                             nfsv4_unlock(&clp->nfsc_lock, 0);
4596                             igotlock = 0;
4597                         }
4598                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4599                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4600                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4601                         continue;
4602                     }
4603                     needsrecall = 0;
4604                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4605                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4606                             needsrecall = 1;
4607                             break;
4608                         }
4609                     }
4610                     if (!needsrecall) {
4611                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4612                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4613                                 needsrecall = 1;
4614                                 break;
4615                             }
4616                         }
4617                     }
4618                     if (needsrecall && !triedrecall) {
4619                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4620                         islept = 0;
4621                         while (!igotlock) {
4622                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4623                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4624                             if (islept)
4625                                 break;
4626                         }
4627                         if (islept)
4628                             continue;
4629                         NFSUNLOCKCLSTATE();
4630                         cred = newnfs_getcred();
4631                         newnfs_copycred(&dp->nfsdl_cred, cred);
4632                         nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4633                         NFSFREECRED(cred);
4634                         triedrecall = 1;
4635                         NFSLOCKCLSTATE();
4636                         nfsv4_unlock(&clp->nfsc_lock, 0);
4637                         igotlock = 0;
4638                         continue;
4639                     }
4640                     *stp = dp->nfsdl_stateid;
4641                     retcnt = 1;
4642                     nfscl_cleandeleg(dp);
4643                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4644                 }
4645                 if (igotlock)
4646                     nfsv4_unlock(&clp->nfsc_lock, 0);
4647                 NFSUNLOCKCLSTATE();
4648                 return (retcnt);
4649         }
4650 }
4651
4652 /*
4653  * Look for associated delegation(s) that should be DelegReturned.
4654  */
4655 int
4656 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4657     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4658 {
4659         struct nfsclclient *clp;
4660         struct nfscldeleg *dp;
4661         struct nfsclowner *owp;
4662         struct nfscllockowner *lp;
4663         struct nfsmount *nmp;
4664         struct ucred *cred;
4665         struct nfsnode *np;
4666         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4667
4668         nmp = VFSTONFS(fvp->v_mount);
4669         *gotfdp = 0;
4670         *gottdp = 0;
4671         NFSLOCKCLSTATE();
4672         /*
4673          * Loop around waiting for:
4674          * - outstanding I/O operations on delegations to complete
4675          * - for a delegation on fvp that has state, lock the client and
4676          *   do a recall
4677          * - return delegation(s) with no state.
4678          */
4679         while (1) {
4680                 clp = nfscl_findcl(nmp);
4681                 if (clp == NULL) {
4682                         NFSUNLOCKCLSTATE();
4683                         return (retcnt);
4684                 }
4685                 np = VTONFS(fvp);
4686                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4687                     np->n_fhp->nfh_len);
4688                 if (dp != NULL && *gotfdp == 0) {
4689                     /*
4690                      * Wait for outstanding I/O ops to be done.
4691                      */
4692                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4693                         if (igotlock) {
4694                             nfsv4_unlock(&clp->nfsc_lock, 0);
4695                             igotlock = 0;
4696                         }
4697                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4698                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4699                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4700                         continue;
4701                     }
4702                     needsrecall = 0;
4703                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4704                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4705                             needsrecall = 1;
4706                             break;
4707                         }
4708                     }
4709                     if (!needsrecall) {
4710                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4711                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4712                                 needsrecall = 1;
4713                                 break;
4714                             }
4715                         }
4716                     }
4717                     if (needsrecall && !triedrecall) {
4718                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4719                         islept = 0;
4720                         while (!igotlock) {
4721                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4722                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4723                             if (islept)
4724                                 break;
4725                         }
4726                         if (islept)
4727                             continue;
4728                         NFSUNLOCKCLSTATE();
4729                         cred = newnfs_getcred();
4730                         newnfs_copycred(&dp->nfsdl_cred, cred);
4731                         nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4732                         NFSFREECRED(cred);
4733                         triedrecall = 1;
4734                         NFSLOCKCLSTATE();
4735                         nfsv4_unlock(&clp->nfsc_lock, 0);
4736                         igotlock = 0;
4737                         continue;
4738                     }
4739                     *fstp = dp->nfsdl_stateid;
4740                     retcnt++;
4741                     *gotfdp = 1;
4742                     nfscl_cleandeleg(dp);
4743                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4744                 }
4745                 if (igotlock) {
4746                     nfsv4_unlock(&clp->nfsc_lock, 0);
4747                     igotlock = 0;
4748                 }
4749                 if (tvp != NULL) {
4750                     np = VTONFS(tvp);
4751                     dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4752                         np->n_fhp->nfh_len);
4753                     if (dp != NULL && *gottdp == 0) {
4754                         /*
4755                          * Wait for outstanding I/O ops to be done.
4756                          */
4757                         if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4758                             dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4759                             (void) nfsmsleep(&dp->nfsdl_rwlock,
4760                                 NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4761                             continue;
4762                         }
4763                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4764                             if (!LIST_EMPTY(&owp->nfsow_open)) {
4765                                 NFSUNLOCKCLSTATE();
4766                                 return (retcnt);
4767                             }
4768                         }
4769                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4770                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4771                                 NFSUNLOCKCLSTATE();
4772                                 return (retcnt);
4773                             }
4774                         }
4775                         *tstp = dp->nfsdl_stateid;
4776                         retcnt++;
4777                         *gottdp = 1;
4778                         nfscl_cleandeleg(dp);
4779                         nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4780                     }
4781                 }
4782                 NFSUNLOCKCLSTATE();
4783                 return (retcnt);
4784         }
4785 }
4786
4787 /*
4788  * Get a reference on the clientid associated with the mount point.
4789  * Return 1 if success, 0 otherwise.
4790  */
4791 int
4792 nfscl_getref(struct nfsmount *nmp)
4793 {
4794         struct nfsclclient *clp;
4795
4796         NFSLOCKCLSTATE();
4797         clp = nfscl_findcl(nmp);
4798         if (clp == NULL) {
4799                 NFSUNLOCKCLSTATE();
4800                 return (0);
4801         }
4802         nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4803         NFSUNLOCKCLSTATE();
4804         return (1);
4805 }
4806
4807 /*
4808  * Release a reference on a clientid acquired with the above call.
4809  */
4810 void
4811 nfscl_relref(struct nfsmount *nmp)
4812 {
4813         struct nfsclclient *clp;
4814
4815         NFSLOCKCLSTATE();
4816         clp = nfscl_findcl(nmp);
4817         if (clp == NULL) {
4818                 NFSUNLOCKCLSTATE();
4819                 return;
4820         }
4821         nfsv4_relref(&clp->nfsc_lock);
4822         NFSUNLOCKCLSTATE();
4823 }
4824
4825 /*
4826  * Save the size attribute in the delegation, since the nfsnode
4827  * is going away.
4828  */
4829 void
4830 nfscl_reclaimnode(vnode_t vp)
4831 {
4832         struct nfsclclient *clp;
4833         struct nfscldeleg *dp;
4834         struct nfsnode *np = VTONFS(vp);
4835         struct nfsmount *nmp;
4836
4837         nmp = VFSTONFS(vp->v_mount);
4838         if (!NFSHASNFSV4(nmp))
4839                 return;
4840         NFSLOCKCLSTATE();
4841         clp = nfscl_findcl(nmp);
4842         if (clp == NULL) {
4843                 NFSUNLOCKCLSTATE();
4844                 return;
4845         }
4846         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4847         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4848                 dp->nfsdl_size = np->n_size;
4849         NFSUNLOCKCLSTATE();
4850 }
4851
4852 /*
4853  * Get the saved size attribute in the delegation, since it is a
4854  * newly allocated nfsnode.
4855  */
4856 void
4857 nfscl_newnode(vnode_t vp)
4858 {
4859         struct nfsclclient *clp;
4860         struct nfscldeleg *dp;
4861         struct nfsnode *np = VTONFS(vp);
4862         struct nfsmount *nmp;
4863
4864         nmp = VFSTONFS(vp->v_mount);
4865         if (!NFSHASNFSV4(nmp))
4866                 return;
4867         NFSLOCKCLSTATE();
4868         clp = nfscl_findcl(nmp);
4869         if (clp == NULL) {
4870                 NFSUNLOCKCLSTATE();
4871                 return;
4872         }
4873         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4874         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4875                 np->n_size = dp->nfsdl_size;
4876         NFSUNLOCKCLSTATE();
4877 }
4878
4879 /*
4880  * If there is a valid write delegation for this file, set the modtime
4881  * to the local clock time.
4882  */
4883 void
4884 nfscl_delegmodtime(vnode_t vp)
4885 {
4886         struct nfsclclient *clp;
4887         struct nfscldeleg *dp;
4888         struct nfsnode *np = VTONFS(vp);
4889         struct nfsmount *nmp;
4890
4891         nmp = VFSTONFS(vp->v_mount);
4892         if (!NFSHASNFSV4(nmp))
4893                 return;
4894         NFSLOCKMNT(nmp);
4895         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4896                 NFSUNLOCKMNT(nmp);
4897                 return;
4898         }
4899         NFSUNLOCKMNT(nmp);
4900         NFSLOCKCLSTATE();
4901         clp = nfscl_findcl(nmp);
4902         if (clp == NULL) {
4903                 NFSUNLOCKCLSTATE();
4904                 return;
4905         }
4906         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4907         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4908                 nanotime(&dp->nfsdl_modtime);
4909                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4910         }
4911         NFSUNLOCKCLSTATE();
4912 }
4913
4914 /*
4915  * If there is a valid write delegation for this file with a modtime set,
4916  * put that modtime in mtime.
4917  */
4918 void
4919 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4920 {
4921         struct nfsclclient *clp;
4922         struct nfscldeleg *dp;
4923         struct nfsnode *np = VTONFS(vp);
4924         struct nfsmount *nmp;
4925
4926         nmp = VFSTONFS(vp->v_mount);
4927         if (!NFSHASNFSV4(nmp))
4928                 return;
4929         NFSLOCKMNT(nmp);
4930         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4931                 NFSUNLOCKMNT(nmp);
4932                 return;
4933         }
4934         NFSUNLOCKMNT(nmp);
4935         NFSLOCKCLSTATE();
4936         clp = nfscl_findcl(nmp);
4937         if (clp == NULL) {
4938                 NFSUNLOCKCLSTATE();
4939                 return;
4940         }
4941         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4942         if (dp != NULL &&
4943             (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4944             (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4945                 *mtime = dp->nfsdl_modtime;
4946         NFSUNLOCKCLSTATE();
4947 }
4948
4949 static int
4950 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4951 {
4952         short *defaulterrp, *errp;
4953
4954         if (!nd->nd_repstat)
4955                 return (0);
4956         if (nd->nd_procnum == NFSPROC_NOOP)
4957                 return (txdr_unsigned(nd->nd_repstat & 0xffff));
4958         if (nd->nd_repstat == EBADRPC)
4959                 return (txdr_unsigned(NFSERR_BADXDR));
4960         if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4961             nd->nd_repstat == NFSERR_OPILLEGAL)
4962                 return (txdr_unsigned(nd->nd_repstat));
4963         if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4964             minorvers > NFSV4_MINORVERSION) {
4965                 /* NFSv4.n error. */
4966                 return (txdr_unsigned(nd->nd_repstat));
4967         }
4968         if (nd->nd_procnum < NFSV4OP_CBNOPS)
4969                 errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4970         else
4971                 return (txdr_unsigned(nd->nd_repstat));
4972         while (*++errp)
4973                 if (*errp == (short)nd->nd_repstat)
4974                         return (txdr_unsigned(nd->nd_repstat));
4975         return (txdr_unsigned(*defaulterrp));
4976 }
4977
4978 /*
4979  * Called to find/add a layout to a client.
4980  * This function returns the layout with a refcnt (shared lock) upon
4981  * success (returns 0) or with no lock/refcnt on the layout when an
4982  * error is returned.
4983  * If a layout is passed in via lypp, it is locked (exclusively locked).
4984  */
4985 int
4986 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4987     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4988     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4989     struct ucred *cred, NFSPROC_T *p)
4990 {
4991         struct nfsclclient *clp;
4992         struct nfscllayout *lyp, *tlyp;
4993         struct nfsclflayout *flp;
4994         struct nfsnode *np = VTONFS(vp);
4995         mount_t mp;
4996         int layout_passed_in;
4997
4998         mp = nmp->nm_mountp;
4999         layout_passed_in = 1;
5000         tlyp = NULL;
5001         lyp = *lypp;
5002         if (lyp == NULL) {
5003                 layout_passed_in = 0;
5004                 tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
5005                     M_WAITOK | M_ZERO);
5006         }
5007
5008         NFSLOCKCLSTATE();
5009         clp = nmp->nm_clp;
5010         if (clp == NULL) {
5011                 if (layout_passed_in != 0)
5012                         nfsv4_unlock(&lyp->nfsly_lock, 0);
5013                 NFSUNLOCKCLSTATE();
5014                 if (tlyp != NULL)
5015                         free(tlyp, M_NFSLAYOUT);
5016                 return (EPERM);
5017         }
5018         if (lyp == NULL) {
5019                 /*
5020                  * Although no lyp was passed in, another thread might have
5021                  * allocated one. If one is found, just increment it's ref
5022                  * count and return it.
5023                  */
5024                 lyp = nfscl_findlayout(clp, fhp, fhlen);
5025                 if (lyp == NULL) {
5026                         lyp = tlyp;
5027                         tlyp = NULL;
5028                         lyp->nfsly_stateid.seqid = stateidp->seqid;
5029                         lyp->nfsly_stateid.other[0] = stateidp->other[0];
5030                         lyp->nfsly_stateid.other[1] = stateidp->other[1];
5031                         lyp->nfsly_stateid.other[2] = stateidp->other[2];
5032                         lyp->nfsly_lastbyte = 0;
5033                         LIST_INIT(&lyp->nfsly_flayread);
5034                         LIST_INIT(&lyp->nfsly_flayrw);
5035                         LIST_INIT(&lyp->nfsly_recall);
5036                         lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
5037                         lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
5038                         lyp->nfsly_clp = clp;
5039                         if (layouttype == NFSLAYOUT_FLEXFILE)
5040                                 lyp->nfsly_flags = NFSLY_FLEXFILE;
5041                         else
5042                                 lyp->nfsly_flags = NFSLY_FILES;
5043                         if (retonclose != 0)
5044                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5045                         lyp->nfsly_fhlen = fhlen;
5046                         NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
5047                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5048                         LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
5049                             nfsly_hash);
5050                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5051                         nfscl_layoutcnt++;
5052                 } else {
5053                         if (retonclose != 0)
5054                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5055                         if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5056                                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5057                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5058                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5059                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5060                 }
5061                 nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5062                 if (NFSCL_FORCEDISM(mp)) {
5063                         NFSUNLOCKCLSTATE();
5064                         if (tlyp != NULL)
5065                                 free(tlyp, M_NFSLAYOUT);
5066                         return (EPERM);
5067                 }
5068                 *lypp = lyp;
5069         } else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5070                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5071
5072         /* Merge the new list of File Layouts into the list. */
5073         flp = LIST_FIRST(fhlp);
5074         if (flp != NULL) {
5075                 if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5076                         nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5077                 else
5078                         nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5079         }
5080         if (layout_passed_in != 0)
5081                 nfsv4_unlock(&lyp->nfsly_lock, 1);
5082         NFSUNLOCKCLSTATE();
5083         if (tlyp != NULL)
5084                 free(tlyp, M_NFSLAYOUT);
5085         return (0);
5086 }
5087
5088 /*
5089  * Search for a layout by MDS file handle.
5090  * If one is found, it is returned with a refcnt (shared lock) iff
5091  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5092  * returned NULL.
5093  */
5094 struct nfscllayout *
5095 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5096     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
5097 {
5098         struct nfscllayout *lyp;
5099         mount_t mp;
5100         int error, igotlock;
5101
5102         mp = clp->nfsc_nmp->nm_mountp;
5103         *recalledp = 0;
5104         *retflpp = NULL;
5105         NFSLOCKCLSTATE();
5106         lyp = nfscl_findlayout(clp, fhp, fhlen);
5107         if (lyp != NULL) {
5108                 if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5109                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5110                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5111                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5112                         error = nfscl_findlayoutforio(lyp, off,
5113                             NFSV4OPEN_ACCESSREAD, retflpp);
5114                         if (error == 0)
5115                                 nfsv4_getref(&lyp->nfsly_lock, NULL,
5116                                     NFSCLSTATEMUTEXPTR, mp);
5117                         else {
5118                                 do {
5119                                         igotlock = nfsv4_lock(&lyp->nfsly_lock,
5120                                             1, NULL, NFSCLSTATEMUTEXPTR, mp);
5121                                 } while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5122                                 *retflpp = NULL;
5123                         }
5124                         if (NFSCL_FORCEDISM(mp)) {
5125                                 lyp = NULL;
5126                                 *recalledp = 1;
5127                         }
5128                 } else {
5129                         lyp = NULL;
5130                         *recalledp = 1;
5131                 }
5132         }
5133         NFSUNLOCKCLSTATE();
5134         return (lyp);
5135 }
5136
5137 /*
5138  * Search for a layout by MDS file handle. If one is found, mark in to be
5139  * recalled, if it already marked "return on close".
5140  */
5141 static void
5142 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5143     int fhlen, struct nfsclrecalllayout **recallpp)
5144 {
5145         struct nfscllayout *lyp;
5146         uint32_t iomode;
5147
5148         if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5149             nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5150             (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5151                 return;
5152         lyp = nfscl_findlayout(clp, fhp, fhlen);
5153         if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5154             NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5155                 iomode = 0;
5156                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5157                         iomode |= NFSLAYOUTIOMODE_READ;
5158                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5159                         iomode |= NFSLAYOUTIOMODE_RW;
5160                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5161                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5162                     *recallpp);
5163                 NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5164                 *recallpp = NULL;
5165         }
5166 }
5167
5168 /*
5169  * Mark the layout to be recalled and with an error.
5170  * Also, disable the dsp from further use.
5171  */
5172 void
5173 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5174     struct nfscllayout *lyp, struct nfsclds *dsp)
5175 {
5176         struct nfsclrecalllayout *recallp;
5177         uint32_t iomode;
5178
5179         printf("DS being disabled, error=%d\n", stat);
5180         /* Set up the return of the layout. */
5181         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5182         iomode = 0;
5183         NFSLOCKCLSTATE();
5184         if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5185                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5186                         iomode |= NFSLAYOUTIOMODE_READ;
5187                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5188                         iomode |= NFSLAYOUTIOMODE_RW;
5189                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5190                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5191                     dp->nfsdi_deviceid, recallp);
5192                 NFSUNLOCKCLSTATE();
5193                 NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5194         } else {
5195                 NFSUNLOCKCLSTATE();
5196                 free(recallp, M_NFSLAYRECALL);
5197         }
5198
5199         /* And shut the TCP connection down. */
5200         nfscl_cancelreqs(dsp);
5201 }
5202
5203 /*
5204  * Cancel all RPCs for this "dsp" by closing the connection.
5205  * Also, mark the session as defunct.
5206  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5207  * cannot be shut down.
5208  */
5209 void
5210 nfscl_cancelreqs(struct nfsclds *dsp)
5211 {
5212         struct __rpc_client *cl;
5213         static int non_event;
5214
5215         NFSLOCKDS(dsp);
5216         if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5217             dsp->nfsclds_sockp != NULL &&
5218             dsp->nfsclds_sockp->nr_client != NULL) {
5219                 dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5220                 cl = dsp->nfsclds_sockp->nr_client;
5221                 dsp->nfsclds_sess.nfsess_defunct = 1;
5222                 NFSUNLOCKDS(dsp);
5223                 CLNT_CLOSE(cl);
5224                 /*
5225                  * This 1sec sleep is done to reduce the number of reconnect
5226                  * attempts made on the DS while it has failed.
5227                  */
5228                 tsleep(&non_event, PVFS, "ndscls", hz);
5229                 return;
5230         }
5231         NFSUNLOCKDS(dsp);
5232 }
5233
5234 /*
5235  * Dereference a layout.
5236  */
5237 void
5238 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5239 {
5240
5241         NFSLOCKCLSTATE();
5242         if (exclocked != 0)
5243                 nfsv4_unlock(&lyp->nfsly_lock, 0);
5244         else
5245                 nfsv4_relref(&lyp->nfsly_lock);
5246         NFSUNLOCKCLSTATE();
5247 }
5248
5249 /*
5250  * Search for a devinfo by deviceid. If one is found, return it after
5251  * acquiring a reference count on it.
5252  */
5253 struct nfscldevinfo *
5254 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5255     struct nfscldevinfo *dip)
5256 {
5257
5258         NFSLOCKCLSTATE();
5259         if (dip == NULL)
5260                 dip = nfscl_finddevinfo(clp, deviceid);
5261         if (dip != NULL)
5262                 dip->nfsdi_refcnt++;
5263         NFSUNLOCKCLSTATE();
5264         return (dip);
5265 }
5266
5267 /*
5268  * Dereference a devinfo structure.
5269  */
5270 static void
5271 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5272 {
5273
5274         dip->nfsdi_refcnt--;
5275         if (dip->nfsdi_refcnt == 0)
5276                 wakeup(&dip->nfsdi_refcnt);
5277 }
5278
5279 /*
5280  * Dereference a devinfo structure.
5281  */
5282 void
5283 nfscl_reldevinfo(struct nfscldevinfo *dip)
5284 {
5285
5286         NFSLOCKCLSTATE();
5287         nfscl_reldevinfo_locked(dip);
5288         NFSUNLOCKCLSTATE();
5289 }
5290
5291 /*
5292  * Find a layout for this file handle. Return NULL upon failure.
5293  */
5294 static struct nfscllayout *
5295 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5296 {
5297         struct nfscllayout *lyp;
5298
5299         LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5300                 if (lyp->nfsly_fhlen == fhlen &&
5301                     !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5302                         break;
5303         return (lyp);
5304 }
5305
5306 /*
5307  * Find a devinfo for this deviceid. Return NULL upon failure.
5308  */
5309 static struct nfscldevinfo *
5310 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5311 {
5312         struct nfscldevinfo *dip;
5313
5314         LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5315                 if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5316                     == 0)
5317                         break;
5318         return (dip);
5319 }
5320
5321 /*
5322  * Merge the new file layout list into the main one, maintaining it in
5323  * increasing offset order.
5324  */
5325 static void
5326 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5327     struct nfsclflayouthead *newfhlp)
5328 {
5329         struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5330
5331         flp = LIST_FIRST(fhlp);
5332         prevflp = NULL;
5333         LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5334                 while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5335                         prevflp = flp;
5336                         flp = LIST_NEXT(flp, nfsfl_list);
5337                 }
5338                 if (prevflp == NULL)
5339                         LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5340                 else
5341                         LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5342                 prevflp = nflp;
5343         }
5344 }
5345
5346 /*
5347  * Add this nfscldevinfo to the client, if it doesn't already exist.
5348  * This function consumes the structure pointed at by dip, if not NULL.
5349  */
5350 int
5351 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5352     struct nfsclflayout *flp)
5353 {
5354         struct nfsclclient *clp;
5355         struct nfscldevinfo *tdip;
5356         uint8_t *dev;
5357
5358         NFSLOCKCLSTATE();
5359         clp = nmp->nm_clp;
5360         if (clp == NULL) {
5361                 NFSUNLOCKCLSTATE();
5362                 if (dip != NULL)
5363                         free(dip, M_NFSDEVINFO);
5364                 return (ENODEV);
5365         }
5366         if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5367                 dev = flp->nfsfl_dev;
5368         else
5369                 dev = flp->nfsfl_ffm[ind].dev;
5370         tdip = nfscl_finddevinfo(clp, dev);
5371         if (tdip != NULL) {
5372                 tdip->nfsdi_layoutrefs++;
5373                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5374                         flp->nfsfl_devp = tdip;
5375                 else
5376                         flp->nfsfl_ffm[ind].devp = tdip;
5377                 nfscl_reldevinfo_locked(tdip);
5378                 NFSUNLOCKCLSTATE();
5379                 if (dip != NULL)
5380                         free(dip, M_NFSDEVINFO);
5381                 return (0);
5382         }
5383         if (dip != NULL) {
5384                 LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5385                 dip->nfsdi_layoutrefs = 1;
5386                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5387                         flp->nfsfl_devp = dip;
5388                 else
5389                         flp->nfsfl_ffm[ind].devp = dip;
5390         }
5391         NFSUNLOCKCLSTATE();
5392         if (dip == NULL)
5393                 return (ENODEV);
5394         return (0);
5395 }
5396
5397 /*
5398  * Free up a layout structure and associated file layout structure(s).
5399  */
5400 void
5401 nfscl_freelayout(struct nfscllayout *layp)
5402 {
5403         struct nfsclflayout *flp, *nflp;
5404         struct nfsclrecalllayout *rp, *nrp;
5405
5406         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5407                 LIST_REMOVE(flp, nfsfl_list);
5408                 nfscl_freeflayout(flp);
5409         }
5410         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5411                 LIST_REMOVE(flp, nfsfl_list);
5412                 nfscl_freeflayout(flp);
5413         }
5414         LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5415                 LIST_REMOVE(rp, nfsrecly_list);
5416                 free(rp, M_NFSLAYRECALL);
5417         }
5418         nfscl_layoutcnt--;
5419         free(layp, M_NFSLAYOUT);
5420 }
5421
5422 /*
5423  * Free up a file layout structure.
5424  */
5425 void
5426 nfscl_freeflayout(struct nfsclflayout *flp)
5427 {
5428         int i, j;
5429
5430         if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5431                 for (i = 0; i < flp->nfsfl_fhcnt; i++)
5432                         free(flp->nfsfl_fh[i], M_NFSFH);
5433                 if (flp->nfsfl_devp != NULL)
5434                         flp->nfsfl_devp->nfsdi_layoutrefs--;
5435         }
5436         if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5437                 for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5438                         for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5439                                 free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5440                         if (flp->nfsfl_ffm[i].devp != NULL)     
5441                                 flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;     
5442                 }
5443         free(flp, M_NFSFLAYOUT);
5444 }
5445
5446 /*
5447  * Free up a file layout devinfo structure.
5448  */
5449 void
5450 nfscl_freedevinfo(struct nfscldevinfo *dip)
5451 {
5452
5453         free(dip, M_NFSDEVINFO);
5454 }
5455
5456 /*
5457  * Mark any layouts that match as recalled.
5458  */
5459 static int
5460 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5461     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5462     char *devid, struct nfsclrecalllayout *recallp)
5463 {
5464         struct nfsclrecalllayout *rp, *orp;
5465
5466         recallp->nfsrecly_recalltype = recalltype;
5467         recallp->nfsrecly_iomode = iomode;
5468         recallp->nfsrecly_stateseqid = stateseqid;
5469         recallp->nfsrecly_off = off;
5470         recallp->nfsrecly_len = len;
5471         recallp->nfsrecly_stat = stat;
5472         recallp->nfsrecly_op = op;
5473         if (devid != NULL)
5474                 NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5475         /*
5476          * Order the list as file returns first, followed by fsid and any
5477          * returns, both in increasing stateseqid order.
5478          * Note that the seqids wrap around, so 1 is after 0xffffffff.
5479          * (I'm not sure this is correct because I find RFC5661 confusing
5480          *  on this, but hopefully it will work ok.)
5481          */
5482         orp = NULL;
5483         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5484                 orp = rp;
5485                 if ((recalltype == NFSLAYOUTRETURN_FILE &&
5486                      (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5487                       nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5488                     (recalltype != NFSLAYOUTRETURN_FILE &&
5489                      rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5490                      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5491                         LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5492                         break;
5493                 }
5494
5495                 /*
5496                  * Put any error return on all the file returns that will
5497                  * preceed this one.
5498                  */
5499                 if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5500                    stat != 0 && rp->nfsrecly_stat == 0) {
5501                         rp->nfsrecly_stat = stat;
5502                         rp->nfsrecly_op = op;
5503                         if (devid != NULL)
5504                                 NFSBCOPY(devid, rp->nfsrecly_devid,
5505                                     NFSX_V4DEVICEID);
5506                 }
5507         }
5508         if (rp == NULL) {
5509                 if (orp == NULL)
5510                         LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5511                             nfsrecly_list);
5512                 else
5513                         LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5514         }
5515         lyp->nfsly_flags |= NFSLY_RECALL;
5516         wakeup(lyp->nfsly_clp);
5517         return (0);
5518 }
5519
5520 /*
5521  * Compare the two seqids for ordering. The trick is that the seqids can
5522  * wrap around from 0xffffffff->0, so check for the cases where one
5523  * has wrapped around.
5524  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5525  */
5526 static int
5527 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5528 {
5529
5530         if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5531                 /* seqid2 has wrapped around. */
5532                 return (0);
5533         if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5534                 /* seqid1 has wrapped around. */
5535                 return (1);
5536         if (seqid1 <= seqid2)
5537                 return (1);
5538         return (0);
5539 }
5540
5541 /*
5542  * Do a layout return for each of the recalls.
5543  */
5544 static void
5545 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5546     struct ucred *cred, NFSPROC_T *p)
5547 {
5548         struct nfsclrecalllayout *rp;
5549         nfsv4stateid_t stateid;
5550         int layouttype;
5551
5552         NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5553         stateid.seqid = lyp->nfsly_stateid.seqid;
5554         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5555                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5556         else
5557                 layouttype = NFSLAYOUT_FLEXFILE;
5558         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5559                 (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5560                     lyp->nfsly_fhlen, 0, layouttype,
5561                     rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5562                     rp->nfsrecly_off, rp->nfsrecly_len,
5563                     &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5564                     rp->nfsrecly_devid);
5565         }
5566 }
5567
5568 /*
5569  * Do the layout commit for a file layout.
5570  */
5571 static void
5572 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5573     struct ucred *cred, NFSPROC_T *p)
5574 {
5575         struct nfsclflayout *flp;
5576         uint64_t len;
5577         int error, layouttype;
5578
5579         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5580                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5581         else
5582                 layouttype = NFSLAYOUT_FLEXFILE;
5583         LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5584                 if (layouttype == NFSLAYOUT_FLEXFILE &&
5585                     (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5586                         NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5587                         /* If not supported, don't bother doing it. */
5588                         NFSLOCKMNT(nmp);
5589                         nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5590                         NFSUNLOCKMNT(nmp);
5591                         break;
5592                 } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5593                         len = flp->nfsfl_end - flp->nfsfl_off;
5594                         error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5595                             lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5596                             lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5597                             layouttype, cred, p, NULL);
5598                         NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5599                         if (error == NFSERR_NOTSUPP) {
5600                                 /* If not supported, don't bother doing it. */
5601                                 NFSLOCKMNT(nmp);
5602                                 nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5603                                 NFSUNLOCKMNT(nmp);
5604                                 break;
5605                         }
5606                 }
5607         }
5608 }
5609
5610 /*
5611  * Commit all layouts for a file (vnode).
5612  */
5613 int
5614 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5615 {
5616         struct nfsclclient *clp;
5617         struct nfscllayout *lyp;
5618         struct nfsnode *np = VTONFS(vp);
5619         mount_t mp;
5620         struct nfsmount *nmp;
5621
5622         mp = vp->v_mount;
5623         nmp = VFSTONFS(mp);
5624         if (NFSHASNOLAYOUTCOMMIT(nmp))
5625                 return (0);
5626         NFSLOCKCLSTATE();
5627         clp = nmp->nm_clp;
5628         if (clp == NULL) {
5629                 NFSUNLOCKCLSTATE();
5630                 return (EPERM);
5631         }
5632         lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5633         if (lyp == NULL) {
5634                 NFSUNLOCKCLSTATE();
5635                 return (EPERM);
5636         }
5637         nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5638         if (NFSCL_FORCEDISM(mp)) {
5639                 NFSUNLOCKCLSTATE();
5640                 return (EPERM);
5641         }
5642 tryagain:
5643         if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5644                 lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5645                 NFSUNLOCKCLSTATE();
5646                 NFSCL_DEBUG(4, "do layoutcommit2\n");
5647                 nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5648                 NFSLOCKCLSTATE();
5649                 goto tryagain;
5650         }
5651         nfsv4_relref(&lyp->nfsly_lock);
5652         NFSUNLOCKCLSTATE();
5653         return (0);
5654 }