]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clstate.c
zfs: merge openzfs/zfs@4a1195ca5 (master) into main
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clstate.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82
83 #include <fs/nfs/nfsport.h>
84
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;     /* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
104     u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
105     struct nfscllockowner **, struct nfsclopen **);
106 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
107     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
108     struct nfsclopen **);
109 static void nfscl_clrelease(struct nfsclclient *);
110 static void nfscl_cleanclient(struct nfsclclient *);
111 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
112     struct ucred *, NFSPROC_T *);
113 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
114     struct nfsmount *, struct ucred *, NFSPROC_T *);
115 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
116     NFSPROC_T *);
117 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
118     struct nfscllock *, int);
119 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
120     struct nfscllock **, int);
121 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
122 static u_int32_t nfscl_nextcbident(void);
123 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
124 static struct nfsclclient *nfscl_getclnt(u_int32_t);
125 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
126 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
127     int);
128 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
129     int, struct nfsclrecalllayout **);
130 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
131 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
132     int);
133 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
134 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
135     u_int8_t *, struct nfscllock **);
136 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
137 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
138     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
139 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
140     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
141     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
142 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
143     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
144     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
145 static void nfscl_totalrecall(struct nfsclclient *);
146 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
147     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
148 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
149     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
150     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
151 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
152     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
153     struct ucred *, NFSPROC_T *);
154 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
155     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
156 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
157     bool);
158 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
159 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
160 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
161     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
162     vnode_t *);
163 static void nfscl_freeopenowner(struct nfsclowner *, int);
164 static void nfscl_cleandeleg(struct nfscldeleg *);
165 static void nfscl_emptylockowner(struct nfscllockowner *,
166     struct nfscllockownerfhhead *);
167 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
168     struct nfsclflayouthead *);
169 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
170     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
171 static int nfscl_seq(uint32_t, uint32_t);
172 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
173     struct ucred *, NFSPROC_T *);
174 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
175     struct ucred *, NFSPROC_T *);
176
177 static short nfscberr_null[] = {
178         0,
179         0,
180 };
181
182 static short nfscberr_getattr[] = {
183         NFSERR_RESOURCE,
184         NFSERR_BADHANDLE,
185         NFSERR_BADXDR,
186         NFSERR_RESOURCE,
187         NFSERR_SERVERFAULT,
188         0,
189 };
190
191 static short nfscberr_recall[] = {
192         NFSERR_RESOURCE,
193         NFSERR_BADHANDLE,
194         NFSERR_BADSTATEID,
195         NFSERR_BADXDR,
196         NFSERR_RESOURCE,
197         NFSERR_SERVERFAULT,
198         0,
199 };
200
201 static short *nfscl_cberrmap[] = {
202         nfscberr_null,
203         nfscberr_null,
204         nfscberr_null,
205         nfscberr_getattr,
206         nfscberr_recall
207 };
208
209 #define NETFAMILY(clp) \
210                 (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
211
212 /*
213  * Called for an open operation.
214  * If the nfhp argument is NULL, just get an openowner.
215  */
216 int
217 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
218     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
219     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
220 {
221         struct nfsclclient *clp;
222         struct nfsclowner *owp, *nowp;
223         struct nfsclopen *op = NULL, *nop = NULL;
224         struct nfscldeleg *dp;
225         struct nfsclownerhead *ohp;
226         u_int8_t own[NFSV4CL_LOCKNAMELEN];
227         int ret;
228
229         if (newonep != NULL)
230                 *newonep = 0;
231         if (opp != NULL)
232                 *opp = NULL;
233         if (owpp != NULL)
234                 *owpp = NULL;
235
236         /*
237          * Might need one or both of these, so MALLOC them now, to
238          * avoid a tsleep() in MALLOC later.
239          */
240         nowp = malloc(sizeof (struct nfsclowner),
241             M_NFSCLOWNER, M_WAITOK);
242         if (nfhp != NULL) {
243             nop = malloc(sizeof (struct nfsclopen) +
244                 fhlen - 1, M_NFSCLOPEN, M_WAITOK);
245             nop->nfso_hash.le_prev = NULL;
246         }
247         ret = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
248         if (ret != 0) {
249                 free(nowp, M_NFSCLOWNER);
250                 if (nop != NULL)
251                         free(nop, M_NFSCLOPEN);
252                 return (ret);
253         }
254
255         /*
256          * Get the Open iff it already exists.
257          * If none found, add the new one or return error, depending upon
258          * "create".
259          */
260         NFSLOCKCLSTATE();
261         dp = NULL;
262         /* First check the delegation list */
263         if (nfhp != NULL && usedeleg) {
264                 LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
265                         if (dp->nfsdl_fhlen == fhlen &&
266                             !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
267                                 if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
268                                     (dp->nfsdl_flags & NFSCLDL_WRITE))
269                                         break;
270                                 dp = NULL;
271                                 break;
272                         }
273                 }
274         }
275
276         /* For NFSv4.1/4.2 and this option, use a single open_owner. */
277         if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
278                 nfscl_filllockowner(NULL, own, F_POSIX);
279         else
280                 nfscl_filllockowner(p->td_proc, own, F_POSIX);
281         if (dp != NULL)
282                 ohp = &dp->nfsdl_owner;
283         else
284                 ohp = &clp->nfsc_owner;
285         /* Now, search for an openowner */
286         LIST_FOREACH(owp, ohp, nfsow_list) {
287                 if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
288                         break;
289         }
290
291         /*
292          * Create a new open, as required.
293          */
294         nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
295             cred, newonep);
296
297         /*
298          * Now, check the mode on the open and return the appropriate
299          * value.
300          */
301         if (retp != NULL) {
302                 if (nfhp != NULL && dp != NULL && nop == NULL)
303                         /* new local open on delegation */
304                         *retp = NFSCLOPEN_SETCRED;
305                 else
306                         *retp = NFSCLOPEN_OK;
307         }
308         if (op != NULL && (amode & ~(op->nfso_mode))) {
309                 op->nfso_mode |= amode;
310                 if (retp != NULL && dp == NULL)
311                         *retp = NFSCLOPEN_DOOPEN;
312         }
313
314         /*
315          * Serialize modifications to the open owner for multiple threads
316          * within the same process using a read/write sleep lock.
317          * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
318          * by acquiring a shared lock.  The close operations still use an
319          * exclusive lock for this case.
320          */
321         if (lockit != 0) {
322                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
323                         /*
324                          * Get a shared lock on the OpenOwner, but first
325                          * wait for any pending exclusive lock, so that the
326                          * exclusive locker gets priority.
327                          */
328                         nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
329                             NFSCLSTATEMUTEXPTR, NULL);
330                         nfsv4_getref(&owp->nfsow_rwlock, NULL,
331                             NFSCLSTATEMUTEXPTR, NULL);
332                 } else
333                         nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
334         }
335         NFSUNLOCKCLSTATE();
336         if (nowp != NULL)
337                 free(nowp, M_NFSCLOWNER);
338         if (nop != NULL)
339                 free(nop, M_NFSCLOPEN);
340         if (owpp != NULL)
341                 *owpp = owp;
342         if (opp != NULL)
343                 *opp = op;
344         return (0);
345 }
346
347 /*
348  * Create a new open, as required.
349  */
350 static void
351 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
352     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
353     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
354     struct ucred *cred, int *newonep)
355 {
356         struct nfsclowner *owp = *owpp, *nowp;
357         struct nfsclopen *op, *nop;
358
359         if (nowpp != NULL)
360                 nowp = *nowpp;
361         else
362                 nowp = NULL;
363         if (nopp != NULL)
364                 nop = *nopp;
365         else
366                 nop = NULL;
367         if (owp == NULL && nowp != NULL) {
368                 NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
369                 LIST_INIT(&nowp->nfsow_open);
370                 nowp->nfsow_clp = clp;
371                 nowp->nfsow_seqid = 0;
372                 nowp->nfsow_defunct = 0;
373                 nfscl_lockinit(&nowp->nfsow_rwlock);
374                 if (dp != NULL) {
375                         nfsstatsv1.cllocalopenowners++;
376                         LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
377                 } else {
378                         nfsstatsv1.clopenowners++;
379                         LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
380                 }
381                 owp = *owpp = nowp;
382                 *nowpp = NULL;
383                 if (newonep != NULL)
384                         *newonep = 1;
385         }
386
387          /* If an fhp has been specified, create an Open as well. */
388         if (fhp != NULL) {
389                 /* and look for the correct open, based upon FH */
390                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
391                         if (op->nfso_fhlen == fhlen &&
392                             !NFSBCMP(op->nfso_fh, fhp, fhlen))
393                                 break;
394                 }
395                 if (op == NULL && nop != NULL) {
396                         nop->nfso_own = owp;
397                         nop->nfso_mode = 0;
398                         nop->nfso_opencnt = 0;
399                         nop->nfso_posixlock = 1;
400                         nop->nfso_fhlen = fhlen;
401                         NFSBCOPY(fhp, nop->nfso_fh, fhlen);
402                         LIST_INIT(&nop->nfso_lock);
403                         nop->nfso_stateid.seqid = 0;
404                         nop->nfso_stateid.other[0] = 0;
405                         nop->nfso_stateid.other[1] = 0;
406                         nop->nfso_stateid.other[2] = 0;
407                         KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
408                         newnfs_copyincred(cred, &nop->nfso_cred);
409                         if (dp != NULL) {
410                                 TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
411                                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
412                                     nfsdl_list);
413                                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
414                                 nfsstatsv1.cllocalopens++;
415                         } else {
416                                 LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
417                                     nop, nfso_hash);
418                                 nfsstatsv1.clopens++;
419                         }
420                         LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
421                         *opp = nop;
422                         *nopp = NULL;
423                         if (newonep != NULL)
424                                 *newonep = 1;
425                 } else {
426                         *opp = op;
427                 }
428         }
429 }
430
431 /*
432  * Called to find/add a delegation to a client.
433  */
434 int
435 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
436     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
437 {
438         struct nfscldeleg *dp = *dpp, *tdp;
439         struct nfsmount *nmp;
440
441         KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
442         nmp = VFSTONFS(mp);
443         /*
444          * First, if we have received a Read delegation for a file on a
445          * read/write file system, just return it, because they aren't
446          * useful, imho.
447          */
448         if (dp != NULL && !NFSMNT_RDONLY(mp) &&
449             (dp->nfsdl_flags & NFSCLDL_READ)) {
450                 nfscl_trydelegreturn(dp, cred, nmp, p);
451                 free(dp, M_NFSCLDELEG);
452                 *dpp = NULL;
453                 return (0);
454         }
455
456         /*
457          * Since a delegation might be added to the mount,
458          * set NFSMNTP_DELEGISSUED now.  If a delegation already
459          * exagain ists, setting this flag is harmless.
460          */
461         NFSLOCKMNT(nmp);
462         nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
463         NFSUNLOCKMNT(nmp);
464
465         /* Look for the correct deleg, based upon FH */
466         NFSLOCKCLSTATE();
467         tdp = nfscl_finddeleg(clp, nfhp, fhlen);
468         if (tdp == NULL) {
469                 if (dp == NULL) {
470                         NFSUNLOCKCLSTATE();
471                         return (NFSERR_BADSTATEID);
472                 }
473                 *dpp = NULL;
474                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
475                 LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
476                     nfsdl_hash);
477                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
478                 nfsstatsv1.cldelegates++;
479                 nfscl_delegcnt++;
480         } else {
481                 /*
482                  * Delegation already exists, what do we do if a new one??
483                  */
484                 if (dp != NULL) {
485                         printf("Deleg already exists!\n");
486                         free(dp, M_NFSCLDELEG);
487                         *dpp = NULL;
488                 } else {
489                         *dpp = tdp;
490                 }
491         }
492         NFSUNLOCKCLSTATE();
493         return (0);
494 }
495
496 /*
497  * Find a delegation for this file handle. Return NULL upon failure.
498  */
499 static struct nfscldeleg *
500 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
501 {
502         struct nfscldeleg *dp;
503
504         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
505             if (dp->nfsdl_fhlen == fhlen &&
506                 !NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
507                 break;
508         }
509         return (dp);
510 }
511
512 /*
513  * Get a stateid for an I/O operation. First, look for an open and iff
514  * found, return either a lockowner stateid or the open stateid.
515  * If no Open is found, just return error and the special stateid of all zeros.
516  */
517 int
518 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
519     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
520     void **lckpp)
521 {
522         struct nfsclclient *clp;
523         struct nfsclopen *op = NULL, *top;
524         struct nfsclopenhash *oph;
525         struct nfscllockowner *lp;
526         struct nfscldeleg *dp;
527         struct nfsnode *np;
528         struct nfsmount *nmp;
529         u_int8_t own[NFSV4CL_LOCKNAMELEN], lockown[NFSV4CL_LOCKNAMELEN];
530         int error;
531         bool done;
532
533         *lckpp = NULL;
534         /*
535          * Initially, just set the special stateid of all zeros.
536          * (Don't do this for a DS, since the special stateid can't be used.)
537          */
538         if (fords == 0) {
539                 stateidp->seqid = 0;
540                 stateidp->other[0] = 0;
541                 stateidp->other[1] = 0;
542                 stateidp->other[2] = 0;
543         }
544         if (vnode_vtype(vp) != VREG)
545                 return (EISDIR);
546         np = VTONFS(vp);
547         nmp = VFSTONFS(vp->v_mount);
548
549         /*
550          * For "oneopenown" mounts, first check for a cached open in the
551          * NFS vnode, that can be used as a stateid.  This can only be
552          * done if no delegations have been issued to the mount and no
553          * byte range file locking has been done for the file.
554          */
555         if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && fords == 0) {
556                 NFSLOCKMNT(nmp);
557                 NFSLOCKNODE(np);
558                 if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 &&
559                     (np->n_flag & NMIGHTBELOCKED) == 0 &&
560                     np->n_openstateid != NULL) {
561                         stateidp->seqid = 0;
562                         stateidp->other[0] =
563                             np->n_openstateid->nfso_stateid.other[0];
564                         stateidp->other[1] =
565                             np->n_openstateid->nfso_stateid.other[1];
566                         stateidp->other[2] =
567                             np->n_openstateid->nfso_stateid.other[2];
568                         NFSUNLOCKNODE(np);
569                         NFSUNLOCKMNT(nmp);
570                         return (0);
571                 }
572                 NFSUNLOCKNODE(np);
573                 NFSUNLOCKMNT(nmp);
574         }
575
576         NFSLOCKCLSTATE();
577         clp = nfscl_findcl(nmp);
578         if (clp == NULL) {
579                 NFSUNLOCKCLSTATE();
580                 return (EACCES);
581         }
582
583         /*
584          * Wait for recovery to complete.
585          */
586         while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
587                 (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
588                     PZERO, "nfsrecvr", NULL);
589
590         /*
591          * First, look for a delegation.
592          */
593         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
594                 if (dp->nfsdl_fhlen == fhlen &&
595                     !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
596                         if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
597                             (dp->nfsdl_flags & NFSCLDL_WRITE)) {
598                                 if (NFSHASNFSV4N(nmp))
599                                         stateidp->seqid = 0;
600                                 else
601                                         stateidp->seqid =
602                                             dp->nfsdl_stateid.seqid;
603                                 stateidp->other[0] = dp->nfsdl_stateid.other[0];
604                                 stateidp->other[1] = dp->nfsdl_stateid.other[1];
605                                 stateidp->other[2] = dp->nfsdl_stateid.other[2];
606                                 if (!(np->n_flag & NDELEGRECALL)) {
607                                         TAILQ_REMOVE(&clp->nfsc_deleg, dp,
608                                             nfsdl_list);
609                                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
610                                             nfsdl_list);
611                                         dp->nfsdl_timestamp = NFSD_MONOSEC +
612                                             120;
613                                         dp->nfsdl_rwlock.nfslock_usecnt++;
614                                         *lckpp = (void *)&dp->nfsdl_rwlock;
615                                 }
616                                 NFSUNLOCKCLSTATE();
617                                 return (0);
618                         }
619                         break;
620                 }
621         }
622
623         if (p != NULL) {
624                 /*
625                  * If p != NULL, we want to search the parentage tree
626                  * for a matching OpenOwner and use that.
627                  */
628                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
629                         nfscl_filllockowner(NULL, own, F_POSIX);
630                 else
631                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
632                 nfscl_filllockowner(p->td_proc, lockown, F_POSIX);
633                 lp = NULL;
634                 error = nfscl_getopen(NULL, clp->nfsc_openhash, nfhp, fhlen,
635                     own, lockown, mode, &lp, &op);
636                 if (error == 0 && lp != NULL && fords == 0) {
637                         /* Don't return a lock stateid for a DS. */
638                         if (NFSHASNFSV4N(nmp))
639                                 stateidp->seqid = 0;
640                         else
641                                 stateidp->seqid = lp->nfsl_stateid.seqid;
642                         stateidp->other[0] =
643                             lp->nfsl_stateid.other[0];
644                         stateidp->other[1] =
645                             lp->nfsl_stateid.other[1];
646                         stateidp->other[2] =
647                             lp->nfsl_stateid.other[2];
648                         NFSUNLOCKCLSTATE();
649                         return (0);
650                 }
651         }
652         if (op == NULL) {
653                 /* If not found, just look for any OpenOwner that will work. */
654                 top = NULL;
655                 done = false;
656                 oph = NFSCLOPENHASH(clp, nfhp, fhlen);
657                 LIST_FOREACH(op, oph, nfso_hash) {
658                         if (op->nfso_fhlen == fhlen &&
659                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
660                                 if (top == NULL && (op->nfso_mode &
661                                     NFSV4OPEN_ACCESSWRITE) != 0 &&
662                                     (mode & NFSV4OPEN_ACCESSREAD) != 0)
663                                         top = op;
664                                 if ((mode & op->nfso_mode) == mode) {
665                                         /* LRU order the hash list. */
666                                         LIST_REMOVE(op, nfso_hash);
667                                         LIST_INSERT_HEAD(oph, op, nfso_hash);
668                                         done = true;
669                                         break;
670                                 }
671                         }
672                 }
673                 if (!done) {
674                         NFSCL_DEBUG(2, "openmode top=%p\n", top);
675                         if (top == NULL || NFSHASOPENMODE(nmp)) {
676                                 NFSUNLOCKCLSTATE();
677                                 return (ENOENT);
678                         } else
679                                 op = top;
680                 }
681                 /*
682                  * For read aheads or write behinds, use the open cred.
683                  * A read ahead or write behind is indicated by p == NULL.
684                  */
685                 if (p == NULL)
686                         newnfs_copycred(&op->nfso_cred, cred);
687         }
688
689         /*
690          * No lock stateid, so return the open stateid.
691          */
692         if (NFSHASNFSV4N(nmp))
693                 stateidp->seqid = 0;
694         else
695                 stateidp->seqid = op->nfso_stateid.seqid;
696         stateidp->other[0] = op->nfso_stateid.other[0];
697         stateidp->other[1] = op->nfso_stateid.other[1];
698         stateidp->other[2] = op->nfso_stateid.other[2];
699         NFSUNLOCKCLSTATE();
700         return (0);
701 }
702
703 /*
704  * Search for a matching file, mode and, optionally, lockowner.
705  */
706 static int
707 nfscl_getopen(struct nfsclownerhead *ohp, struct nfsclopenhash *ohashp,
708     u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown,
709     u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp)
710 {
711         struct nfsclowner *owp;
712         struct nfsclopen *op, *rop, *rop2;
713         struct nfsclopenhash *oph;
714         bool keep_looping;
715
716         KASSERT(ohp == NULL || ohashp == NULL, ("nfscl_getopen: "
717             "only one of ohp and ohashp can be set"));
718         if (lpp != NULL)
719                 *lpp = NULL;
720         /*
721          * rop will be set to the open to be returned. There are three
722          * variants of this, all for an open of the correct file:
723          * 1 - A match of lockown.
724          * 2 - A match of the openown, when no lockown match exists.
725          * 3 - A match for any open, if no openown or lockown match exists.
726          * Looking for #2 over #3 probably isn't necessary, but since
727          * RFC3530 is vague w.r.t. the relationship between openowners and
728          * lockowners, I think this is the safer way to go.
729          */
730         rop = NULL;
731         rop2 = NULL;
732         keep_looping = true;
733         /* Search the client list */
734         if (ohashp == NULL) {
735                 /* Search the local opens on the delegation. */
736                 LIST_FOREACH(owp, ohp, nfsow_list) {
737                         /* and look for the correct open */
738                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
739                                 if (op->nfso_fhlen == fhlen &&
740                                     !NFSBCMP(op->nfso_fh, nfhp, fhlen)
741                                     && (op->nfso_mode & mode) == mode)
742                                         keep_looping = nfscl_checkown(owp, op, openown,
743                                             lockown, lpp, &rop, &rop2);
744                                 if (!keep_looping)
745                                         break;
746                         }
747                         if (!keep_looping)
748                                 break;
749                 }
750         } else {
751                 /* Search for matching opens on the hash list. */
752                 oph = &ohashp[NFSCLOPENHASHFUNC(nfhp, fhlen)];
753                 LIST_FOREACH(op, oph, nfso_hash) {
754                         if (op->nfso_fhlen == fhlen &&
755                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)
756                             && (op->nfso_mode & mode) == mode)
757                                 keep_looping = nfscl_checkown(op->nfso_own, op,
758                                     openown, lockown, lpp, &rop, &rop2);
759                         if (!keep_looping) {
760                                 /* LRU order the hash list. */
761                                 LIST_REMOVE(op, nfso_hash);
762                                 LIST_INSERT_HEAD(oph, op, nfso_hash);
763                                 break;
764                         }
765                 }
766         }
767         if (rop == NULL)
768                 rop = rop2;
769         if (rop == NULL)
770                 return (EBADF);
771         *opp = rop;
772         return (0);
773 }
774
775 /* Check for an owner match. */
776 static bool
777 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
778     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
779     struct nfsclopen **ropp2)
780 {
781         struct nfscllockowner *lp;
782         bool keep_looping;
783
784         keep_looping = true;
785         if (lpp != NULL) {
786                 /* Now look for a matching lockowner. */
787                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
788                         if (!NFSBCMP(lp->nfsl_owner, lockown,
789                             NFSV4CL_LOCKNAMELEN)) {
790                                 *lpp = lp;
791                                 *ropp = op;
792                                 return (false);
793                         }
794                 }
795         }
796         if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
797             NFSV4CL_LOCKNAMELEN)) {
798                 *ropp = op;
799                 if (lpp == NULL)
800                         keep_looping = false;
801         }
802         if (*ropp2 == NULL)
803                 *ropp2 = op;
804         return (keep_looping);
805 }
806
807 /*
808  * Release use of an open owner. Called when open operations are done
809  * with the open owner.
810  */
811 void
812 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
813     __unused int error, __unused int candelete, int unlocked)
814 {
815
816         if (owp == NULL)
817                 return;
818         NFSLOCKCLSTATE();
819         if (unlocked == 0) {
820                 if (NFSHASONEOPENOWN(nmp))
821                         nfsv4_relref(&owp->nfsow_rwlock);
822                 else
823                         nfscl_lockunlock(&owp->nfsow_rwlock);
824         }
825         nfscl_clrelease(owp->nfsow_clp);
826         NFSUNLOCKCLSTATE();
827 }
828
829 /*
830  * Release use of an open structure under an open owner.
831  */
832 void
833 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
834     int candelete)
835 {
836         struct nfsclclient *clp;
837         struct nfsclowner *owp;
838
839         if (op == NULL)
840                 return;
841         NFSLOCKCLSTATE();
842         owp = op->nfso_own;
843         if (NFSHASONEOPENOWN(nmp))
844                 nfsv4_relref(&owp->nfsow_rwlock);
845         else
846                 nfscl_lockunlock(&owp->nfsow_rwlock);
847         clp = owp->nfsow_clp;
848         if (error && candelete && op->nfso_opencnt == 0)
849                 nfscl_freeopen(op, 0);
850         nfscl_clrelease(clp);
851         NFSUNLOCKCLSTATE();
852 }
853
854 /*
855  * Called to get a clientid structure. It will optionally lock the
856  * client data structures to do the SetClientId/SetClientId_confirm,
857  * but will release that lock and return the clientid with a reference
858  * count on it.
859  * If the "cred" argument is NULL, a new clientid should not be created.
860  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
861  * be done.
862  * It always clpp with a reference count on it, unless returning an error.
863  */
864 int
865 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
866     bool tryminvers, struct nfsclclient **clpp)
867 {
868         struct nfsclclient *clp;
869         struct nfsclclient *newclp = NULL;
870         struct nfsmount *nmp;
871         char uuid[HOSTUUIDLEN];
872         int igotlock = 0, error, trystalecnt, clidinusedelay, i;
873         u_int16_t idlen = 0;
874
875         nmp = VFSTONFS(mp);
876         if (cred != NULL) {
877                 getcredhostuuid(cred, uuid, sizeof uuid);
878                 idlen = strlen(uuid);
879                 if (idlen > 0)
880                         idlen += sizeof (u_int64_t);
881                 else
882                         idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
883                 newclp = malloc(
884                     sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
885                     M_WAITOK | M_ZERO);
886         }
887         NFSLOCKCLSTATE();
888         /*
889          * If a forced dismount is already in progress, don't
890          * allocate a new clientid and get out now. For the case where
891          * clp != NULL, this is a harmless optimization.
892          */
893         if (NFSCL_FORCEDISM(mp)) {
894                 NFSUNLOCKCLSTATE();
895                 if (newclp != NULL)
896                         free(newclp, M_NFSCLCLIENT);
897                 return (EBADF);
898         }
899         clp = nmp->nm_clp;
900         if (clp == NULL) {
901                 if (newclp == NULL) {
902                         NFSUNLOCKCLSTATE();
903                         return (EACCES);
904                 }
905                 clp = newclp;
906                 clp->nfsc_idlen = idlen;
907                 LIST_INIT(&clp->nfsc_owner);
908                 TAILQ_INIT(&clp->nfsc_deleg);
909                 TAILQ_INIT(&clp->nfsc_layout);
910                 LIST_INIT(&clp->nfsc_devinfo);
911                 for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
912                         LIST_INIT(&clp->nfsc_deleghash[i]);
913                 for (i = 0; i < NFSCLOPENHASHSIZE; i++)
914                         LIST_INIT(&clp->nfsc_openhash[i]);
915                 for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
916                         LIST_INIT(&clp->nfsc_layouthash[i]);
917                 clp->nfsc_flags = NFSCLFLAGS_INITED;
918                 clp->nfsc_clientidrev = 1;
919                 clp->nfsc_cbident = nfscl_nextcbident();
920                 nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
921                     clp->nfsc_idlen);
922                 LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
923                 nmp->nm_clp = clp;
924                 clp->nfsc_nmp = nmp;
925         } else {
926                 if (newclp != NULL)
927                         free(newclp, M_NFSCLCLIENT);
928         }
929         while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
930             !NFSCL_FORCEDISM(mp))
931                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
932                     NFSCLSTATEMUTEXPTR, mp);
933         if (igotlock == 0) {
934                 /*
935                  * Call nfsv4_lock() with "iwantlock == 0" so that it will
936                  * wait for a pending exclusive lock request.  This gives the
937                  * exclusive lock request priority over this shared lock
938                  * request.
939                  * An exclusive lock on nfsc_lock is used mainly for server
940                  * crash recoveries.
941                  */
942                 nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
943                 nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
944         }
945         if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
946                 /*
947                  * Both nfsv4_lock() and nfsv4_getref() know to check
948                  * for NFSCL_FORCEDISM() and return without sleeping to
949                  * wait for the exclusive lock to be released, since it
950                  * might be held by nfscl_umount() and we need to get out
951                  * now for that case and not wait until nfscl_umount()
952                  * releases it.
953                  */
954                 NFSUNLOCKCLSTATE();
955                 return (EBADF);
956         }
957         NFSUNLOCKCLSTATE();
958
959         /*
960          * If it needs a clientid, do the setclientid now.
961          */
962         if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
963                 if (!igotlock)
964                         panic("nfscl_clget");
965                 if (p == NULL || cred == NULL) {
966                         NFSLOCKCLSTATE();
967                         nfsv4_unlock(&clp->nfsc_lock, 0);
968                         NFSUNLOCKCLSTATE();
969                         return (EACCES);
970                 }
971                 /*
972                  * If RFC3530 Sec. 14.2.33 is taken literally,
973                  * NFSERR_CLIDINUSE will be returned persistently for the
974                  * case where a new mount of the same file system is using
975                  * a different principal. In practice, NFSERR_CLIDINUSE is
976                  * only returned when there is outstanding unexpired state
977                  * on the clientid. As such, try for twice the lease
978                  * interval, if we know what that is. Otherwise, make a
979                  * wild ass guess.
980                  * The case of returning NFSERR_STALECLIENTID is far less
981                  * likely, but might occur if there is a significant delay
982                  * between doing the SetClientID and SetClientIDConfirm Ops,
983                  * such that the server throws away the clientid before
984                  * receiving the SetClientIDConfirm.
985                  */
986                 if (clp->nfsc_renew > 0)
987                         clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
988                 else
989                         clidinusedelay = 120;
990                 trystalecnt = 3;
991                 do {
992                         error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
993                         if (error == NFSERR_STALECLIENTID ||
994                             error == NFSERR_STALEDONTRECOVER ||
995                             error == NFSERR_BADSESSION ||
996                             error == NFSERR_CLIDINUSE) {
997                                 (void) nfs_catnap(PZERO, error, "nfs_setcl");
998                         } else if (error == NFSERR_MINORVERMISMATCH &&
999                             tryminvers) {
1000                                 if (nmp->nm_minorvers > 0)
1001                                         nmp->nm_minorvers--;
1002                                 else
1003                                         tryminvers = false;
1004                         }
1005                 } while (((error == NFSERR_STALECLIENTID ||
1006                      error == NFSERR_BADSESSION ||
1007                      error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
1008                     (error == NFSERR_CLIDINUSE && --clidinusedelay > 0) ||
1009                     (error == NFSERR_MINORVERMISMATCH && tryminvers));
1010                 if (error) {
1011                         NFSLOCKCLSTATE();
1012                         nfsv4_unlock(&clp->nfsc_lock, 0);
1013                         NFSUNLOCKCLSTATE();
1014                         return (error);
1015                 }
1016                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
1017         }
1018         if (igotlock) {
1019                 NFSLOCKCLSTATE();
1020                 nfsv4_unlock(&clp->nfsc_lock, 1);
1021                 NFSUNLOCKCLSTATE();
1022         }
1023
1024         *clpp = clp;
1025         return (0);
1026 }
1027
1028 /*
1029  * Get a reference to a clientid and return it, if valid.
1030  */
1031 struct nfsclclient *
1032 nfscl_findcl(struct nfsmount *nmp)
1033 {
1034         struct nfsclclient *clp;
1035
1036         clp = nmp->nm_clp;
1037         if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
1038                 return (NULL);
1039         return (clp);
1040 }
1041
1042 /*
1043  * Release the clientid structure. It may be locked or reference counted.
1044  */
1045 static void
1046 nfscl_clrelease(struct nfsclclient *clp)
1047 {
1048
1049         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1050                 nfsv4_unlock(&clp->nfsc_lock, 0);
1051         else
1052                 nfsv4_relref(&clp->nfsc_lock);
1053 }
1054
1055 /*
1056  * External call for nfscl_clrelease.
1057  */
1058 void
1059 nfscl_clientrelease(struct nfsclclient *clp)
1060 {
1061
1062         NFSLOCKCLSTATE();
1063         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
1064                 nfsv4_unlock(&clp->nfsc_lock, 0);
1065         else
1066                 nfsv4_relref(&clp->nfsc_lock);
1067         NFSUNLOCKCLSTATE();
1068 }
1069
1070 /*
1071  * Called when wanting to lock a byte region.
1072  */
1073 int
1074 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1075     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
1076     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
1077     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1078 {
1079         struct nfscllockowner *lp;
1080         struct nfsclopen *op;
1081         struct nfsclclient *clp;
1082         struct nfscllockowner *nlp;
1083         struct nfscllock *nlop, *otherlop;
1084         struct nfscldeleg *dp = NULL, *ldp = NULL;
1085         struct nfscllockownerhead *lhp = NULL;
1086         struct nfsnode *np;
1087         u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1088         u_int8_t *openownp;
1089         int error = 0, ret, donelocally = 0;
1090         u_int32_t mode;
1091
1092         /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1093         mode = 0;
1094         np = VTONFS(vp);
1095         *lpp = NULL;
1096         lp = NULL;
1097         *newonep = 0;
1098         *donelocallyp = 0;
1099
1100         /*
1101          * Might need these, so MALLOC them now, to
1102          * avoid a tsleep() in MALLOC later.
1103          */
1104         nlp = malloc(
1105             sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1106         otherlop = malloc(
1107             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1108         nlop = malloc(
1109             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1110         nlop->nfslo_type = type;
1111         nlop->nfslo_first = off;
1112         if (len == NFS64BITSSET) {
1113                 nlop->nfslo_end = NFS64BITSSET;
1114         } else {
1115                 nlop->nfslo_end = off + len;
1116                 if (nlop->nfslo_end <= nlop->nfslo_first)
1117                         error = NFSERR_INVAL;
1118         }
1119
1120         if (!error) {
1121                 if (recovery)
1122                         clp = rclp;
1123                 else
1124                         error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1125         }
1126         if (error) {
1127                 free(nlp, M_NFSCLLOCKOWNER);
1128                 free(otherlop, M_NFSCLLOCK);
1129                 free(nlop, M_NFSCLLOCK);
1130                 return (error);
1131         }
1132
1133         op = NULL;
1134         if (recovery) {
1135                 ownp = rownp;
1136                 openownp = ropenownp;
1137         } else {
1138                 nfscl_filllockowner(id, own, flags);
1139                 ownp = own;
1140                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1141                         nfscl_filllockowner(NULL, openown, F_POSIX);
1142                 else
1143                         nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1144                 openownp = openown;
1145         }
1146         if (!recovery) {
1147                 NFSLOCKCLSTATE();
1148                 /*
1149                  * First, search for a delegation. If one exists for this file,
1150                  * the lock can be done locally against it, so long as there
1151                  * isn't a local lock conflict.
1152                  */
1153                 ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1154                     np->n_fhp->nfh_len);
1155                 /* Just sanity check for correct type of delegation */
1156                 if (dp != NULL && ((dp->nfsdl_flags &
1157                     (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1158                      (type == F_WRLCK &&
1159                       (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1160                         dp = NULL;
1161         }
1162         if (dp != NULL) {
1163                 /* Now, find an open and maybe a lockowner. */
1164                 ret = nfscl_getopen(&dp->nfsdl_owner, NULL, np->n_fhp->nfh_fh,
1165                     np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1166                 if (ret)
1167                         ret = nfscl_getopen(NULL, clp->nfsc_openhash,
1168                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1169                             ownp, mode, NULL, &op);
1170                 if (!ret) {
1171                         lhp = &dp->nfsdl_lock;
1172                         TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1173                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1174                         dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1175                         donelocally = 1;
1176                 } else {
1177                         dp = NULL;
1178                 }
1179         }
1180         if (!donelocally) {
1181                 /*
1182                  * Get the related Open and maybe lockowner.
1183                  */
1184                 error = nfscl_getopen(NULL, clp->nfsc_openhash,
1185                     np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1186                     ownp, mode, &lp, &op);
1187                 if (!error)
1188                         lhp = &op->nfso_lock;
1189         }
1190         if (!error && !recovery)
1191                 error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1192                     np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1193         if (error) {
1194                 if (!recovery) {
1195                         nfscl_clrelease(clp);
1196                         NFSUNLOCKCLSTATE();
1197                 }
1198                 free(nlp, M_NFSCLLOCKOWNER);
1199                 free(otherlop, M_NFSCLLOCK);
1200                 free(nlop, M_NFSCLLOCK);
1201                 return (error);
1202         }
1203
1204         /*
1205          * Ok, see if a lockowner exists and create one, as required.
1206          */
1207         if (lp == NULL)
1208                 LIST_FOREACH(lp, lhp, nfsl_list) {
1209                         if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1210                                 break;
1211                 }
1212         if (lp == NULL) {
1213                 NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1214                 if (recovery)
1215                         NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1216                             NFSV4CL_LOCKNAMELEN);
1217                 else
1218                         NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1219                             NFSV4CL_LOCKNAMELEN);
1220                 nlp->nfsl_seqid = 0;
1221                 nlp->nfsl_lockflags = flags;
1222                 nlp->nfsl_inprog = NULL;
1223                 nfscl_lockinit(&nlp->nfsl_rwlock);
1224                 LIST_INIT(&nlp->nfsl_lock);
1225                 if (donelocally) {
1226                         nlp->nfsl_open = NULL;
1227                         nfsstatsv1.cllocallockowners++;
1228                 } else {
1229                         nlp->nfsl_open = op;
1230                         nfsstatsv1.cllockowners++;
1231                 }
1232                 LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1233                 lp = nlp;
1234                 nlp = NULL;
1235                 *newonep = 1;
1236         }
1237
1238         /*
1239          * Now, update the byte ranges for locks.
1240          */
1241         ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1242         if (!ret)
1243                 donelocally = 1;
1244         if (donelocally) {
1245                 *donelocallyp = 1;
1246                 if (!recovery)
1247                         nfscl_clrelease(clp);
1248         } else {
1249                 /*
1250                  * Serial modifications on the lock owner for multiple threads
1251                  * for the same process using a read/write lock.
1252                  */
1253                 if (!recovery)
1254                         nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1255         }
1256         if (!recovery)
1257                 NFSUNLOCKCLSTATE();
1258
1259         if (nlp)
1260                 free(nlp, M_NFSCLLOCKOWNER);
1261         if (nlop)
1262                 free(nlop, M_NFSCLLOCK);
1263         if (otherlop)
1264                 free(otherlop, M_NFSCLLOCK);
1265
1266         *lpp = lp;
1267         return (0);
1268 }
1269
1270 /*
1271  * Called to unlock a byte range, for LockU.
1272  */
1273 int
1274 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1275     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1276     struct nfsclclient *clp, void *id, int flags,
1277     struct nfscllockowner **lpp, int *dorpcp)
1278 {
1279         struct nfscllockowner *lp;
1280         struct nfsclopen *op;
1281         struct nfscllock *nlop, *other_lop = NULL;
1282         struct nfscldeleg *dp;
1283         struct nfsnode *np;
1284         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1285         int ret = 0, fnd;
1286
1287         np = VTONFS(vp);
1288         *lpp = NULL;
1289         *dorpcp = 0;
1290
1291         /*
1292          * Might need these, so MALLOC them now, to
1293          * avoid a tsleep() in MALLOC later.
1294          */
1295         nlop = malloc(
1296             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1297         nlop->nfslo_type = F_UNLCK;
1298         nlop->nfslo_first = off;
1299         if (len == NFS64BITSSET) {
1300                 nlop->nfslo_end = NFS64BITSSET;
1301         } else {
1302                 nlop->nfslo_end = off + len;
1303                 if (nlop->nfslo_end <= nlop->nfslo_first) {
1304                         free(nlop, M_NFSCLLOCK);
1305                         return (NFSERR_INVAL);
1306                 }
1307         }
1308         if (callcnt == 0) {
1309                 other_lop = malloc(
1310                     sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1311                 *other_lop = *nlop;
1312         }
1313         nfscl_filllockowner(id, own, flags);
1314         dp = NULL;
1315         NFSLOCKCLSTATE();
1316         if (callcnt == 0)
1317                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1318                     np->n_fhp->nfh_len);
1319
1320         /*
1321          * First, unlock any local regions on a delegation.
1322          */
1323         if (dp != NULL) {
1324                 /* Look for this lockowner. */
1325                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1326                         if (!NFSBCMP(lp->nfsl_owner, own,
1327                             NFSV4CL_LOCKNAMELEN))
1328                                 break;
1329                 }
1330                 if (lp != NULL)
1331                         /* Use other_lop, so nlop is still available */
1332                         (void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1333         }
1334
1335         /*
1336          * Now, find a matching open/lockowner that hasn't already been done,
1337          * as marked by nfsl_inprog.
1338          */
1339         lp = NULL;
1340         fnd = 0;
1341         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1342             np->n_fhp->nfh_len), nfso_hash) {
1343                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1344                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1345                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1346                                 if (lp->nfsl_inprog == NULL &&
1347                                     !NFSBCMP(lp->nfsl_owner, own,
1348                                      NFSV4CL_LOCKNAMELEN)) {
1349                                         fnd = 1;
1350                                         break;
1351                                 }
1352                         }
1353                 }
1354                 if (fnd)
1355                         break;
1356         }
1357
1358         if (lp != NULL) {
1359                 ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1360                 if (ret)
1361                         *dorpcp = 1;
1362                 /*
1363                  * Serial modifications on the lock owner for multiple
1364                  * threads for the same process using a read/write lock.
1365                  */
1366                 lp->nfsl_inprog = p;
1367                 nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1368                 *lpp = lp;
1369         }
1370         NFSUNLOCKCLSTATE();
1371         if (nlop)
1372                 free(nlop, M_NFSCLLOCK);
1373         if (other_lop)
1374                 free(other_lop, M_NFSCLLOCK);
1375         return (0);
1376 }
1377
1378 /*
1379  * Release all lockowners marked in progess for this process and file.
1380  */
1381 void
1382 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1383     void *id, int flags)
1384 {
1385         struct nfsclopen *op;
1386         struct nfscllockowner *lp;
1387         struct nfsnode *np;
1388         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1389
1390         np = VTONFS(vp);
1391         nfscl_filllockowner(id, own, flags);
1392         NFSLOCKCLSTATE();
1393         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1394             np->n_fhp->nfh_len), nfso_hash) {
1395                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1396                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1397                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1398                                 if (lp->nfsl_inprog == p &&
1399                                     !NFSBCMP(lp->nfsl_owner, own,
1400                                     NFSV4CL_LOCKNAMELEN)) {
1401                                         lp->nfsl_inprog = NULL;
1402                                         nfscl_lockunlock(&lp->nfsl_rwlock);
1403                                 }
1404                         }
1405                 }
1406         }
1407         nfscl_clrelease(clp);
1408         NFSUNLOCKCLSTATE();
1409 }
1410
1411 /*
1412  * Called to find out if any bytes within the byte range specified are
1413  * write locked by the calling process. Used to determine if flushing
1414  * is required before a LockU.
1415  * If in doubt, return 1, so the flush will occur.
1416  */
1417 int
1418 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1419     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1420 {
1421         struct nfscllockowner *lp;
1422         struct nfsclopen *op;
1423         struct nfsclclient *clp;
1424         struct nfscllock *lop;
1425         struct nfscldeleg *dp;
1426         struct nfsnode *np;
1427         u_int64_t off, end;
1428         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1429         int error = 0;
1430
1431         np = VTONFS(vp);
1432         switch (fl->l_whence) {
1433         case SEEK_SET:
1434         case SEEK_CUR:
1435                 /*
1436                  * Caller is responsible for adding any necessary offset
1437                  * when SEEK_CUR is used.
1438                  */
1439                 off = fl->l_start;
1440                 break;
1441         case SEEK_END:
1442                 off = np->n_size + fl->l_start;
1443                 break;
1444         default:
1445                 return (1);
1446         }
1447         if (fl->l_len != 0) {
1448                 end = off + fl->l_len;
1449                 if (end < off)
1450                         return (1);
1451         } else {
1452                 end = NFS64BITSSET;
1453         }
1454
1455         error = nfscl_getcl(vp->v_mount, cred, p, false, &clp);
1456         if (error)
1457                 return (1);
1458         nfscl_filllockowner(id, own, flags);
1459         NFSLOCKCLSTATE();
1460
1461         /*
1462          * First check the delegation locks.
1463          */
1464         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1465         if (dp != NULL) {
1466                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1467                         if (!NFSBCMP(lp->nfsl_owner, own,
1468                             NFSV4CL_LOCKNAMELEN))
1469                                 break;
1470                 }
1471                 if (lp != NULL) {
1472                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1473                                 if (lop->nfslo_first >= end)
1474                                         break;
1475                                 if (lop->nfslo_end <= off)
1476                                         continue;
1477                                 if (lop->nfslo_type == F_WRLCK) {
1478                                         nfscl_clrelease(clp);
1479                                         NFSUNLOCKCLSTATE();
1480                                         return (1);
1481                                 }
1482                         }
1483                 }
1484         }
1485
1486         /*
1487          * Now, check state against the server.
1488          */
1489         LIST_FOREACH(op, NFSCLOPENHASH(clp, np->n_fhp->nfh_fh,
1490             np->n_fhp->nfh_len), nfso_hash) {
1491                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1492                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1493                         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1494                                 if (!NFSBCMP(lp->nfsl_owner, own,
1495                                     NFSV4CL_LOCKNAMELEN))
1496                                         break;
1497                         }
1498                         if (lp != NULL) {
1499                                 LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1500                                         if (lop->nfslo_first >= end)
1501                                                 break;
1502                                         if (lop->nfslo_end <= off)
1503                                                 continue;
1504                                         if (lop->nfslo_type == F_WRLCK) {
1505                                                 nfscl_clrelease(clp);
1506                                                 NFSUNLOCKCLSTATE();
1507                                                 return (1);
1508                                         }
1509                                 }
1510                         }
1511                 }
1512         }
1513         nfscl_clrelease(clp);
1514         NFSUNLOCKCLSTATE();
1515         return (0);
1516 }
1517
1518 /*
1519  * Release a byte range lock owner structure.
1520  */
1521 void
1522 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1523 {
1524         struct nfsclclient *clp;
1525
1526         if (lp == NULL)
1527                 return;
1528         NFSLOCKCLSTATE();
1529         clp = lp->nfsl_open->nfso_own->nfsow_clp;
1530         if (error != 0 && candelete &&
1531             (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1532                 nfscl_freelockowner(lp, 0);
1533         else
1534                 nfscl_lockunlock(&lp->nfsl_rwlock);
1535         nfscl_clrelease(clp);
1536         NFSUNLOCKCLSTATE();
1537 }
1538
1539 /*
1540  * Free up an open structure and any associated byte range lock structures.
1541  */
1542 void
1543 nfscl_freeopen(struct nfsclopen *op, int local)
1544 {
1545
1546         LIST_REMOVE(op, nfso_list);
1547         if (op->nfso_hash.le_prev != NULL)
1548                 LIST_REMOVE(op, nfso_hash);
1549         nfscl_freealllocks(&op->nfso_lock, local);
1550         free(op, M_NFSCLOPEN);
1551         if (local)
1552                 nfsstatsv1.cllocalopens--;
1553         else
1554                 nfsstatsv1.clopens--;
1555 }
1556
1557 /*
1558  * Free up all lock owners and associated locks.
1559  */
1560 static void
1561 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1562 {
1563         struct nfscllockowner *lp, *nlp;
1564
1565         LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1566                 if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1567                         panic("nfscllckw");
1568                 nfscl_freelockowner(lp, local);
1569         }
1570 }
1571
1572 /*
1573  * Called for an Open when NFSERR_EXPIRED is received from the server.
1574  * If there are no byte range locks nor a Share Deny lost, try to do a
1575  * fresh Open. Otherwise, free the open.
1576  */
1577 static int
1578 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1579     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1580 {
1581         struct nfscllockowner *lp;
1582         struct nfscldeleg *dp;
1583         int mustdelete = 0, error;
1584
1585         /*
1586          * Look for any byte range lock(s).
1587          */
1588         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1589                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
1590                         mustdelete = 1;
1591                         break;
1592                 }
1593         }
1594
1595         /*
1596          * If no byte range lock(s) nor a Share deny, try to re-open.
1597          */
1598         if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1599                 newnfs_copycred(&op->nfso_cred, cred);
1600                 dp = NULL;
1601                 error = nfsrpc_reopen(nmp, op->nfso_fh,
1602                     op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1603                 if (error) {
1604                         mustdelete = 1;
1605                         if (dp != NULL) {
1606                                 free(dp, M_NFSCLDELEG);
1607                                 dp = NULL;
1608                         }
1609                 }
1610                 if (dp != NULL)
1611                         nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1612                             op->nfso_fhlen, cred, p, &dp);
1613         }
1614
1615         /*
1616          * If a byte range lock or Share deny or couldn't re-open, free it.
1617          */
1618         if (mustdelete)
1619                 nfscl_freeopen(op, 0);
1620         return (mustdelete);
1621 }
1622
1623 /*
1624  * Free up an open owner structure.
1625  */
1626 static void
1627 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1628 {
1629
1630         LIST_REMOVE(owp, nfsow_list);
1631         free(owp, M_NFSCLOWNER);
1632         if (local)
1633                 nfsstatsv1.cllocalopenowners--;
1634         else
1635                 nfsstatsv1.clopenowners--;
1636 }
1637
1638 /*
1639  * Free up a byte range lock owner structure.
1640  */
1641 void
1642 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1643 {
1644         struct nfscllock *lop, *nlop;
1645
1646         LIST_REMOVE(lp, nfsl_list);
1647         LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1648                 nfscl_freelock(lop, local);
1649         }
1650         free(lp, M_NFSCLLOCKOWNER);
1651         if (local)
1652                 nfsstatsv1.cllocallockowners--;
1653         else
1654                 nfsstatsv1.cllockowners--;
1655 }
1656
1657 /*
1658  * Free up a byte range lock structure.
1659  */
1660 void
1661 nfscl_freelock(struct nfscllock *lop, int local)
1662 {
1663
1664         LIST_REMOVE(lop, nfslo_list);
1665         free(lop, M_NFSCLLOCK);
1666         if (local)
1667                 nfsstatsv1.cllocallocks--;
1668         else
1669                 nfsstatsv1.cllocks--;
1670 }
1671
1672 /*
1673  * Clean out the state related to a delegation.
1674  */
1675 static void
1676 nfscl_cleandeleg(struct nfscldeleg *dp)
1677 {
1678         struct nfsclowner *owp, *nowp;
1679         struct nfsclopen *op;
1680
1681         LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1682                 op = LIST_FIRST(&owp->nfsow_open);
1683                 if (op != NULL) {
1684                         if (LIST_NEXT(op, nfso_list) != NULL)
1685                                 panic("nfscleandel");
1686                         nfscl_freeopen(op, 1);
1687                 }
1688                 nfscl_freeopenowner(owp, 1);
1689         }
1690         nfscl_freealllocks(&dp->nfsdl_lock, 1);
1691 }
1692
1693 /*
1694  * Free a delegation.
1695  */
1696 static void
1697 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp, bool freeit)
1698 {
1699
1700         TAILQ_REMOVE(hdp, dp, nfsdl_list);
1701         LIST_REMOVE(dp, nfsdl_hash);
1702         if (freeit)
1703                 free(dp, M_NFSCLDELEG);
1704         nfsstatsv1.cldelegates--;
1705         nfscl_delegcnt--;
1706 }
1707
1708 /*
1709  * Free up all state related to this client structure.
1710  */
1711 static void
1712 nfscl_cleanclient(struct nfsclclient *clp)
1713 {
1714         struct nfsclowner *owp, *nowp;
1715         struct nfsclopen *op, *nop;
1716         struct nfscllayout *lyp, *nlyp;
1717         struct nfscldevinfo *dip, *ndip;
1718
1719         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1720                 nfscl_freelayout(lyp);
1721
1722         LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1723                 nfscl_freedevinfo(dip);
1724
1725         /* Now, all the OpenOwners, etc. */
1726         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1727                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1728                         nfscl_freeopen(op, 0);
1729                 }
1730                 nfscl_freeopenowner(owp, 0);
1731         }
1732 }
1733
1734 /*
1735  * Called when an NFSERR_EXPIRED is received from the server.
1736  */
1737 static void
1738 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1739     struct ucred *cred, NFSPROC_T *p)
1740 {
1741         struct nfsclowner *owp, *nowp, *towp;
1742         struct nfsclopen *op, *nop, *top;
1743         struct nfscldeleg *dp, *ndp;
1744         int ret, printed = 0;
1745
1746         /*
1747          * First, merge locally issued Opens into the list for the server.
1748          */
1749         dp = TAILQ_FIRST(&clp->nfsc_deleg);
1750         while (dp != NULL) {
1751             ndp = TAILQ_NEXT(dp, nfsdl_list);
1752             owp = LIST_FIRST(&dp->nfsdl_owner);
1753             while (owp != NULL) {
1754                 nowp = LIST_NEXT(owp, nfsow_list);
1755                 op = LIST_FIRST(&owp->nfsow_open);
1756                 if (op != NULL) {
1757                     if (LIST_NEXT(op, nfso_list) != NULL)
1758                         panic("nfsclexp");
1759                     LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1760                         if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1761                             NFSV4CL_LOCKNAMELEN))
1762                             break;
1763                     }
1764                     if (towp != NULL) {
1765                         /* Merge opens in */
1766                         LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1767                             if (top->nfso_fhlen == op->nfso_fhlen &&
1768                                 !NFSBCMP(top->nfso_fh, op->nfso_fh,
1769                                  op->nfso_fhlen)) {
1770                                 top->nfso_mode |= op->nfso_mode;
1771                                 top->nfso_opencnt += op->nfso_opencnt;
1772                                 break;
1773                             }
1774                         }
1775                         if (top == NULL) {
1776                             /* Just add the open to the owner list */
1777                             LIST_REMOVE(op, nfso_list);
1778                             op->nfso_own = towp;
1779                             LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1780                             LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1781                                 op->nfso_fhlen), op, nfso_hash);
1782                             nfsstatsv1.cllocalopens--;
1783                             nfsstatsv1.clopens++;
1784                         }
1785                     } else {
1786                         /* Just add the openowner to the client list */
1787                         LIST_REMOVE(owp, nfsow_list);
1788                         owp->nfsow_clp = clp;
1789                         LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1790                         LIST_INSERT_HEAD(NFSCLOPENHASH(clp, op->nfso_fh,
1791                             op->nfso_fhlen), op, nfso_hash);
1792                         nfsstatsv1.cllocalopenowners--;
1793                         nfsstatsv1.clopenowners++;
1794                         nfsstatsv1.cllocalopens--;
1795                         nfsstatsv1.clopens++;
1796                     }
1797                 }
1798                 owp = nowp;
1799             }
1800             if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1801                 printed = 1;
1802                 printf("nfsv4 expired locks lost\n");
1803             }
1804             nfscl_cleandeleg(dp);
1805             nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
1806             dp = ndp;
1807         }
1808         if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1809             panic("nfsclexp");
1810
1811         /*
1812          * Now, try and reopen against the server.
1813          */
1814         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1815                 owp->nfsow_seqid = 0;
1816                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1817                         ret = nfscl_expireopen(clp, op, nmp, cred, p);
1818                         if (ret && !printed) {
1819                                 printed = 1;
1820                                 printf("nfsv4 expired locks lost\n");
1821                         }
1822                 }
1823                 if (LIST_EMPTY(&owp->nfsow_open))
1824                         nfscl_freeopenowner(owp, 0);
1825         }
1826 }
1827
1828 /*
1829  * This function must be called after the process represented by "own" has
1830  * exited. Must be called with CLSTATE lock held.
1831  */
1832 static void
1833 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1834 {
1835         struct nfsclowner *owp, *nowp;
1836         struct nfscllockowner *lp, *nlp;
1837         struct nfscldeleg *dp;
1838
1839         /* First, get rid of local locks on delegations. */
1840         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1841                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1842                     if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1843                         if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1844                             panic("nfscllckw");
1845                         nfscl_freelockowner(lp, 1);
1846                     }
1847                 }
1848         }
1849         owp = LIST_FIRST(&clp->nfsc_owner);
1850         while (owp != NULL) {
1851                 nowp = LIST_NEXT(owp, nfsow_list);
1852                 if (!NFSBCMP(owp->nfsow_owner, own,
1853                     NFSV4CL_LOCKNAMELEN)) {
1854                         /*
1855                          * If there are children that haven't closed the
1856                          * file descriptors yet, the opens will still be
1857                          * here. For that case, let the renew thread clear
1858                          * out the OpenOwner later.
1859                          */
1860                         if (LIST_EMPTY(&owp->nfsow_open))
1861                                 nfscl_freeopenowner(owp, 0);
1862                         else
1863                                 owp->nfsow_defunct = 1;
1864                 }
1865                 owp = nowp;
1866         }
1867 }
1868
1869 /*
1870  * Find open/lock owners for processes that have exited.
1871  */
1872 static void
1873 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1874 {
1875         struct nfsclowner *owp, *nowp;
1876         struct nfsclopen *op;
1877         struct nfscllockowner *lp, *nlp;
1878         struct nfscldeleg *dp;
1879
1880         /*
1881          * All the pidhash locks must be acquired, since they are sx locks
1882          * and must be acquired before the mutexes.  The pid(s) that will
1883          * be used aren't known yet, so all the locks need to be acquired.
1884          * Fortunately, this function is only performed once/sec.
1885          */
1886         pidhash_slockall();
1887         NFSLOCKCLSTATE();
1888         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1889                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1890                         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1891                                 if (LIST_EMPTY(&lp->nfsl_lock))
1892                                         nfscl_emptylockowner(lp, lhp);
1893                         }
1894                 }
1895                 if (nfscl_procdoesntexist(owp->nfsow_owner))
1896                         nfscl_cleanup_common(clp, owp->nfsow_owner);
1897         }
1898
1899         /*
1900          * For the single open_owner case, these lock owners need to be
1901          * checked to see if they still exist separately.
1902          * This is because nfscl_procdoesntexist() never returns true for
1903          * the single open_owner so that the above doesn't ever call
1904          * nfscl_cleanup_common().
1905          */
1906         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1907                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1908                         if (nfscl_procdoesntexist(lp->nfsl_owner))
1909                                 nfscl_cleanup_common(clp, lp->nfsl_owner);
1910                 }
1911         }
1912         NFSUNLOCKCLSTATE();
1913         pidhash_sunlockall();
1914 }
1915
1916 /*
1917  * Take the empty lock owner and move it to the local lhp list if the
1918  * associated process no longer exists.
1919  */
1920 static void
1921 nfscl_emptylockowner(struct nfscllockowner *lp,
1922     struct nfscllockownerfhhead *lhp)
1923 {
1924         struct nfscllockownerfh *lfhp, *mylfhp;
1925         struct nfscllockowner *nlp;
1926         int fnd_it;
1927
1928         /* If not a Posix lock owner, just return. */
1929         if ((lp->nfsl_lockflags & F_POSIX) == 0)
1930                 return;
1931
1932         fnd_it = 0;
1933         mylfhp = NULL;
1934         /*
1935          * First, search to see if this lock owner is already in the list.
1936          * If it is, then the associated process no longer exists.
1937          */
1938         SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1939                 if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1940                     !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1941                     lfhp->nfslfh_len))
1942                         mylfhp = lfhp;
1943                 LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1944                         if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1945                             NFSV4CL_LOCKNAMELEN))
1946                                 fnd_it = 1;
1947         }
1948         /* If not found, check if process still exists. */
1949         if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1950                 return;
1951
1952         /* Move the lock owner over to the local list. */
1953         if (mylfhp == NULL) {
1954                 mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1955                     M_NOWAIT);
1956                 if (mylfhp == NULL)
1957                         return;
1958                 mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1959                 NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1960                     mylfhp->nfslfh_len);
1961                 LIST_INIT(&mylfhp->nfslfh_lock);
1962                 SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1963         }
1964         LIST_REMOVE(lp, nfsl_list);
1965         LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1966 }
1967
1968 static int      fake_global;    /* Used to force visibility of MNTK_UNMOUNTF */
1969 /*
1970  * Called from nfs umount to free up the clientid.
1971  */
1972 void
1973 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1974 {
1975         struct nfsclclient *clp;
1976         struct ucred *cred;
1977         int igotlock;
1978
1979         /*
1980          * For the case that matters, this is the thread that set
1981          * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1982          * done to ensure that any thread executing nfscl_getcl() after
1983          * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1984          * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1985          * explanation, courtesy of Alan Cox.
1986          * What follows is a snippet from Alan Cox's email at:
1987          * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1988          * 
1989          * 1. Set MNTK_UNMOUNTF
1990          * 2. Acquire a standard FreeBSD mutex "m".
1991          * 3. Update some data structures.
1992          * 4. Release mutex "m".
1993          * 
1994          * Then, other threads that acquire "m" after step 4 has occurred will
1995          * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1996          * step 2 may or may not see MNTK_UNMOUNTF as set.
1997          */
1998         NFSLOCKCLSTATE();
1999         if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
2000                 fake_global++;
2001                 NFSUNLOCKCLSTATE();
2002                 NFSLOCKCLSTATE();
2003         }
2004
2005         clp = nmp->nm_clp;
2006         if (clp != NULL) {
2007                 if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
2008                         panic("nfscl umount");
2009
2010                 /*
2011                  * First, handshake with the nfscl renew thread, to terminate
2012                  * it.
2013                  */
2014                 clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
2015                 while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
2016                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
2017                             "nfsclumnt", hz);
2018
2019                 /*
2020                  * Now, get the exclusive lock on the client state, so
2021                  * that no uses of the state are still in progress.
2022                  */
2023                 do {
2024                         igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2025                             NFSCLSTATEMUTEXPTR, NULL);
2026                 } while (!igotlock);
2027                 NFSUNLOCKCLSTATE();
2028
2029                 /*
2030                  * Free up all the state. It will expire on the server, but
2031                  * maybe we should do a SetClientId/SetClientIdConfirm so
2032                  * the server throws it away?
2033                  */
2034                 LIST_REMOVE(clp, nfsc_list);
2035                 nfscl_delegreturnall(clp, p);
2036                 cred = newnfs_getcred();
2037                 if (NFSHASNFSV4N(nmp)) {
2038                         (void)nfsrpc_destroysession(nmp, clp, cred, p);
2039                         (void)nfsrpc_destroyclient(nmp, clp, cred, p);
2040                 } else
2041                         (void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2042                 nfscl_cleanclient(clp);
2043                 nmp->nm_clp = NULL;
2044                 NFSFREECRED(cred);
2045                 free(clp, M_NFSCLCLIENT);
2046         } else
2047                 NFSUNLOCKCLSTATE();
2048 }
2049
2050 /*
2051  * This function is called when a server replies with NFSERR_STALECLIENTID
2052  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
2053  * doing Opens and Locks with reclaim. If these fail, it deletes the
2054  * corresponding state.
2055  */
2056 static void
2057 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
2058     NFSPROC_T *p)
2059 {
2060         struct nfsclowner *owp, *nowp;
2061         struct nfsclopen *op, *nop;
2062         struct nfscllockowner *lp, *nlp;
2063         struct nfscllock *lop, *nlop;
2064         struct nfscldeleg *dp, *ndp, *tdp;
2065         struct nfsmount *nmp;
2066         struct ucred *tcred;
2067         struct nfsclopenhead extra_open;
2068         struct nfscldeleghead extra_deleg;
2069         struct nfsreq *rep;
2070         u_int64_t len;
2071         u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
2072         int i, igotlock = 0, error, trycnt, firstlock;
2073         struct nfscllayout *lyp, *nlyp;
2074         bool recovered_one;
2075
2076         /*
2077          * First, lock the client structure, so everyone else will
2078          * block when trying to use state.
2079          */
2080         NFSLOCKCLSTATE();
2081         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2082         do {
2083                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2084                     NFSCLSTATEMUTEXPTR, NULL);
2085         } while (!igotlock);
2086         NFSUNLOCKCLSTATE();
2087
2088         nmp = clp->nfsc_nmp;
2089         if (nmp == NULL)
2090                 panic("nfscl recover");
2091
2092         /*
2093          * For now, just get rid of all layouts. There may be a need
2094          * to do LayoutCommit Ops with reclaim == true later.
2095          */
2096         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2097                 nfscl_freelayout(lyp);
2098         TAILQ_INIT(&clp->nfsc_layout);
2099         for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2100                 LIST_INIT(&clp->nfsc_layouthash[i]);
2101
2102         trycnt = 5;
2103         tcred = NULL;
2104         do {
2105                 error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2106         } while ((error == NFSERR_STALECLIENTID ||
2107              error == NFSERR_BADSESSION ||
2108              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2109         if (error) {
2110                 NFSLOCKCLSTATE();
2111                 clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2112                     NFSCLFLAGS_RECVRINPROG);
2113                 wakeup(&clp->nfsc_flags);
2114                 nfsv4_unlock(&clp->nfsc_lock, 0);
2115                 NFSUNLOCKCLSTATE();
2116                 return;
2117         }
2118         clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2119         clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2120
2121         /*
2122          * Mark requests already queued on the server, so that they don't
2123          * initiate another recovery cycle. Any requests already in the
2124          * queue that handle state information will have the old stale
2125          * clientid/stateid and will get a NFSERR_STALESTATEID,
2126          * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2127          * This will be translated to NFSERR_STALEDONTRECOVER when
2128          * R_DONTRECOVER is set.
2129          */
2130         NFSLOCKREQ();
2131         TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2132                 if (rep->r_nmp == nmp)
2133                         rep->r_flags |= R_DONTRECOVER;
2134         }
2135         NFSUNLOCKREQ();
2136
2137         /*
2138          * If nfsrpc_setclient() returns *retokp == true,
2139          * no more recovery is needed.
2140          */
2141         if (*retokp)
2142                 goto out;
2143
2144         /*
2145          * Now, mark all delegations "need reclaim".
2146          */
2147         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2148                 dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2149
2150         TAILQ_INIT(&extra_deleg);
2151         LIST_INIT(&extra_open);
2152         /*
2153          * Now traverse the state lists, doing Open and Lock Reclaims.
2154          */
2155         tcred = newnfs_getcred();
2156         recovered_one = false;
2157         owp = LIST_FIRST(&clp->nfsc_owner);
2158         while (owp != NULL) {
2159             nowp = LIST_NEXT(owp, nfsow_list);
2160             owp->nfsow_seqid = 0;
2161             op = LIST_FIRST(&owp->nfsow_open);
2162             while (op != NULL) {
2163                 nop = LIST_NEXT(op, nfso_list);
2164                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2165                     /* Search for a delegation to reclaim with the open */
2166                     TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2167                         if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2168                             continue;
2169                         if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2170                             mode = NFSV4OPEN_ACCESSWRITE;
2171                             delegtype = NFSV4OPEN_DELEGATEWRITE;
2172                         } else {
2173                             mode = NFSV4OPEN_ACCESSREAD;
2174                             delegtype = NFSV4OPEN_DELEGATEREAD;
2175                         }
2176                         if ((op->nfso_mode & mode) == mode &&
2177                             op->nfso_fhlen == dp->nfsdl_fhlen &&
2178                             !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2179                             break;
2180                     }
2181                     ndp = dp;
2182                     if (dp == NULL)
2183                         delegtype = NFSV4OPEN_DELEGATENONE;
2184                     newnfs_copycred(&op->nfso_cred, tcred);
2185                     error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2186                         op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2187                         op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2188                         tcred, p);
2189                     if (!error) {
2190                         recovered_one = true;
2191                         /* Handle any replied delegation */
2192                         if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2193                             || NFSMNT_RDONLY(nmp->nm_mountp))) {
2194                             if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2195                                 mode = NFSV4OPEN_ACCESSWRITE;
2196                             else
2197                                 mode = NFSV4OPEN_ACCESSREAD;
2198                             TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2199                                 if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2200                                     continue;
2201                                 if ((op->nfso_mode & mode) == mode &&
2202                                     op->nfso_fhlen == dp->nfsdl_fhlen &&
2203                                     !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2204                                     op->nfso_fhlen)) {
2205                                     dp->nfsdl_stateid = ndp->nfsdl_stateid;
2206                                     dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2207                                     dp->nfsdl_ace = ndp->nfsdl_ace;
2208                                     dp->nfsdl_change = ndp->nfsdl_change;
2209                                     dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2210                                     if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2211                                         dp->nfsdl_flags |= NFSCLDL_RECALL;
2212                                     free(ndp, M_NFSCLDELEG);
2213                                     ndp = NULL;
2214                                     break;
2215                                 }
2216                             }
2217                         }
2218                         if (ndp != NULL)
2219                             TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2220
2221                         /* and reclaim all byte range locks */
2222                         lp = LIST_FIRST(&op->nfso_lock);
2223                         while (lp != NULL) {
2224                             nlp = LIST_NEXT(lp, nfsl_list);
2225                             lp->nfsl_seqid = 0;
2226                             firstlock = 1;
2227                             lop = LIST_FIRST(&lp->nfsl_lock);
2228                             while (lop != NULL) {
2229                                 nlop = LIST_NEXT(lop, nfslo_list);
2230                                 if (lop->nfslo_end == NFS64BITSSET)
2231                                     len = NFS64BITSSET;
2232                                 else
2233                                     len = lop->nfslo_end - lop->nfslo_first;
2234                                 error = nfscl_trylock(nmp, NULL,
2235                                     op->nfso_fh, op->nfso_fhlen, lp,
2236                                     firstlock, 1, lop->nfslo_first, len,
2237                                     lop->nfslo_type, tcred, p);
2238                                 if (error != 0)
2239                                     nfscl_freelock(lop, 0);
2240                                 else
2241                                     firstlock = 0;
2242                                 lop = nlop;
2243                             }
2244                             /* If no locks, but a lockowner, just delete it. */
2245                             if (LIST_EMPTY(&lp->nfsl_lock))
2246                                 nfscl_freelockowner(lp, 0);
2247                             lp = nlp;
2248                         }
2249                     } else if (error == NFSERR_NOGRACE && !recovered_one &&
2250                         NFSHASNFSV4N(nmp)) {
2251                         /*
2252                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2253                          * actually end up here, since the client will do
2254                          * a recovery for NFSERR_BADSESSION, but will get
2255                          * an NFSERR_NOGRACE reply for the first "reclaim"
2256                          * attempt.
2257                          * So, call nfscl_expireclient() to recover the
2258                          * opens as best we can and then do a reclaim
2259                          * complete and return.
2260                          */
2261                         nfsrpc_reclaimcomplete(nmp, cred, p);
2262                         nfscl_expireclient(clp, nmp, tcred, p);
2263                         goto out;
2264                     }
2265                 }
2266                 if (error != 0 && error != NFSERR_BADSESSION)
2267                     nfscl_freeopen(op, 0);
2268                 op = nop;
2269             }
2270             owp = nowp;
2271         }
2272
2273         /*
2274          * Now, try and get any delegations not yet reclaimed by cobbling
2275          * to-gether an appropriate open.
2276          */
2277         nowp = NULL;
2278         dp = TAILQ_FIRST(&clp->nfsc_deleg);
2279         while (dp != NULL) {
2280             ndp = TAILQ_NEXT(dp, nfsdl_list);
2281             if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2282                 if (nowp == NULL) {
2283                     nowp = malloc(
2284                         sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2285                     /*
2286                      * Name must be as long an largest possible
2287                      * NFSV4CL_LOCKNAMELEN. 12 for now.
2288                      */
2289                     NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2290                         NFSV4CL_LOCKNAMELEN);
2291                     LIST_INIT(&nowp->nfsow_open);
2292                     nowp->nfsow_clp = clp;
2293                     nowp->nfsow_seqid = 0;
2294                     nowp->nfsow_defunct = 0;
2295                     nfscl_lockinit(&nowp->nfsow_rwlock);
2296                 }
2297                 nop = NULL;
2298                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2299                     nop = malloc(sizeof (struct nfsclopen) +
2300                         dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2301                     nop->nfso_own = nowp;
2302                     if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2303                         nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2304                         delegtype = NFSV4OPEN_DELEGATEWRITE;
2305                     } else {
2306                         nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2307                         delegtype = NFSV4OPEN_DELEGATEREAD;
2308                     }
2309                     nop->nfso_opencnt = 0;
2310                     nop->nfso_posixlock = 1;
2311                     nop->nfso_fhlen = dp->nfsdl_fhlen;
2312                     NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2313                     LIST_INIT(&nop->nfso_lock);
2314                     nop->nfso_stateid.seqid = 0;
2315                     nop->nfso_stateid.other[0] = 0;
2316                     nop->nfso_stateid.other[1] = 0;
2317                     nop->nfso_stateid.other[2] = 0;
2318                     newnfs_copycred(&dp->nfsdl_cred, tcred);
2319                     newnfs_copyincred(tcred, &nop->nfso_cred);
2320                     tdp = NULL;
2321                     error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2322                         nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2323                         nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2324                         delegtype, tcred, p);
2325                     if (tdp != NULL) {
2326                         if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2327                             mode = NFSV4OPEN_ACCESSWRITE;
2328                         else
2329                             mode = NFSV4OPEN_ACCESSREAD;
2330                         if ((nop->nfso_mode & mode) == mode &&
2331                             nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2332                             !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2333                             nop->nfso_fhlen)) {
2334                             dp->nfsdl_stateid = tdp->nfsdl_stateid;
2335                             dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2336                             dp->nfsdl_ace = tdp->nfsdl_ace;
2337                             dp->nfsdl_change = tdp->nfsdl_change;
2338                             dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2339                             if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2340                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2341                             free(tdp, M_NFSCLDELEG);
2342                         } else {
2343                             TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2344                         }
2345                     }
2346                 }
2347                 if (error) {
2348                     if (nop != NULL)
2349                         free(nop, M_NFSCLOPEN);
2350                     if (error == NFSERR_NOGRACE && !recovered_one &&
2351                         NFSHASNFSV4N(nmp)) {
2352                         /*
2353                          * For NFSv4.1/4.2, the NFSERR_EXPIRED case will
2354                          * actually end up here, since the client will do
2355                          * a recovery for NFSERR_BADSESSION, but will get
2356                          * an NFSERR_NOGRACE reply for the first "reclaim"
2357                          * attempt.
2358                          * So, call nfscl_expireclient() to recover the
2359                          * opens as best we can and then do a reclaim
2360                          * complete and return.
2361                          */
2362                         nfsrpc_reclaimcomplete(nmp, cred, p);
2363                         nfscl_expireclient(clp, nmp, tcred, p);
2364                         free(nowp, M_NFSCLOWNER);
2365                         goto out;
2366                     }
2367                     /*
2368                      * Couldn't reclaim it, so throw the state
2369                      * away. Ouch!!
2370                      */
2371                     nfscl_cleandeleg(dp);
2372                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
2373                 } else {
2374                     recovered_one = true;
2375                     LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2376                 }
2377             }
2378             dp = ndp;
2379         }
2380
2381         /*
2382          * Now, get rid of extra Opens and Delegations.
2383          */
2384         LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2385                 do {
2386                         newnfs_copycred(&op->nfso_cred, tcred);
2387                         error = nfscl_tryclose(op, tcred, nmp, p);
2388                         if (error == NFSERR_GRACE)
2389                                 (void) nfs_catnap(PZERO, error, "nfsexcls");
2390                 } while (error == NFSERR_GRACE);
2391                 LIST_REMOVE(op, nfso_list);
2392                 free(op, M_NFSCLOPEN);
2393         }
2394         if (nowp != NULL)
2395                 free(nowp, M_NFSCLOWNER);
2396
2397         TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2398                 do {
2399                         newnfs_copycred(&dp->nfsdl_cred, tcred);
2400                         error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2401                         if (error == NFSERR_GRACE)
2402                                 (void) nfs_catnap(PZERO, error, "nfsexdlg");
2403                 } while (error == NFSERR_GRACE);
2404                 TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2405                 free(dp, M_NFSCLDELEG);
2406         }
2407
2408         /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2409         if (NFSHASNFSV4N(nmp))
2410                 (void)nfsrpc_reclaimcomplete(nmp, cred, p);
2411
2412 out:
2413         NFSLOCKCLSTATE();
2414         clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2415         wakeup(&clp->nfsc_flags);
2416         nfsv4_unlock(&clp->nfsc_lock, 0);
2417         NFSUNLOCKCLSTATE();
2418         if (tcred != NULL)
2419                 NFSFREECRED(tcred);
2420 }
2421
2422 /*
2423  * This function is called when a server replies with NFSERR_EXPIRED.
2424  * It deletes all state for the client and does a fresh SetClientId/confirm.
2425  * XXX Someday it should post a signal to the process(es) that hold the
2426  * state, so they know that lock state has been lost.
2427  */
2428 int
2429 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2430 {
2431         struct nfsmount *nmp;
2432         struct ucred *cred;
2433         int igotlock = 0, error, trycnt;
2434
2435         /*
2436          * If the clientid has gone away or a new SetClientid has already
2437          * been done, just return ok.
2438          */
2439         if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2440                 return (0);
2441
2442         /*
2443          * First, lock the client structure, so everyone else will
2444          * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2445          * that only one thread does the work.
2446          */
2447         NFSLOCKCLSTATE();
2448         clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2449         do {
2450                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2451                     NFSCLSTATEMUTEXPTR, NULL);
2452         } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2453         if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2454                 if (igotlock)
2455                         nfsv4_unlock(&clp->nfsc_lock, 0);
2456                 NFSUNLOCKCLSTATE();
2457                 return (0);
2458         }
2459         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2460         NFSUNLOCKCLSTATE();
2461
2462         nmp = clp->nfsc_nmp;
2463         if (nmp == NULL)
2464                 panic("nfscl expired");
2465         cred = newnfs_getcred();
2466         trycnt = 5;
2467         do {
2468                 error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2469         } while ((error == NFSERR_STALECLIENTID ||
2470              error == NFSERR_BADSESSION ||
2471              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2472         if (error) {
2473                 NFSLOCKCLSTATE();
2474                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2475         } else {
2476                 /*
2477                  * Expire the state for the client.
2478                  */
2479                 nfscl_expireclient(clp, nmp, cred, p);
2480                 NFSLOCKCLSTATE();
2481                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2482                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2483         }
2484         clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2485         wakeup(&clp->nfsc_flags);
2486         nfsv4_unlock(&clp->nfsc_lock, 0);
2487         NFSUNLOCKCLSTATE();
2488         NFSFREECRED(cred);
2489         return (error);
2490 }
2491
2492 /*
2493  * This function inserts a lock in the list after insert_lop.
2494  */
2495 static void
2496 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2497     struct nfscllock *insert_lop, int local)
2498 {
2499
2500         if ((struct nfscllockowner *)insert_lop == lp)
2501                 LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2502         else
2503                 LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2504         if (local)
2505                 nfsstatsv1.cllocallocks++;
2506         else
2507                 nfsstatsv1.cllocks++;
2508 }
2509
2510 /*
2511  * This function updates the locking for a lock owner and given file. It
2512  * maintains a list of lock ranges ordered on increasing file offset that
2513  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2514  * It always adds new_lop to the list and sometimes uses the one pointed
2515  * at by other_lopp.
2516  * Returns 1 if the locks were modified, 0 otherwise.
2517  */
2518 static int
2519 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2520     struct nfscllock **other_lopp, int local)
2521 {
2522         struct nfscllock *new_lop = *new_lopp;
2523         struct nfscllock *lop, *tlop, *ilop;
2524         struct nfscllock *other_lop;
2525         int unlock = 0, modified = 0;
2526         u_int64_t tmp;
2527
2528         /*
2529          * Work down the list until the lock is merged.
2530          */
2531         if (new_lop->nfslo_type == F_UNLCK)
2532                 unlock = 1;
2533         ilop = (struct nfscllock *)lp;
2534         lop = LIST_FIRST(&lp->nfsl_lock);
2535         while (lop != NULL) {
2536             /*
2537              * Only check locks for this file that aren't before the start of
2538              * new lock's range.
2539              */
2540             if (lop->nfslo_end >= new_lop->nfslo_first) {
2541                 if (new_lop->nfslo_end < lop->nfslo_first) {
2542                     /*
2543                      * If the new lock ends before the start of the
2544                      * current lock's range, no merge, just insert
2545                      * the new lock.
2546                      */
2547                     break;
2548                 }
2549                 if (new_lop->nfslo_type == lop->nfslo_type ||
2550                     (new_lop->nfslo_first <= lop->nfslo_first &&
2551                      new_lop->nfslo_end >= lop->nfslo_end)) {
2552                     /*
2553                      * This lock can be absorbed by the new lock/unlock.
2554                      * This happens when it covers the entire range
2555                      * of the old lock or is contiguous
2556                      * with the old lock and is of the same type or an
2557                      * unlock.
2558                      */
2559                     if (new_lop->nfslo_type != lop->nfslo_type ||
2560                         new_lop->nfslo_first != lop->nfslo_first ||
2561                         new_lop->nfslo_end != lop->nfslo_end)
2562                         modified = 1;
2563                     if (lop->nfslo_first < new_lop->nfslo_first)
2564                         new_lop->nfslo_first = lop->nfslo_first;
2565                     if (lop->nfslo_end > new_lop->nfslo_end)
2566                         new_lop->nfslo_end = lop->nfslo_end;
2567                     tlop = lop;
2568                     lop = LIST_NEXT(lop, nfslo_list);
2569                     nfscl_freelock(tlop, local);
2570                     continue;
2571                 }
2572
2573                 /*
2574                  * All these cases are for contiguous locks that are not the
2575                  * same type, so they can't be merged.
2576                  */
2577                 if (new_lop->nfslo_first <= lop->nfslo_first) {
2578                     /*
2579                      * This case is where the new lock overlaps with the
2580                      * first part of the old lock. Move the start of the
2581                      * old lock to just past the end of the new lock. The
2582                      * new lock will be inserted in front of the old, since
2583                      * ilop hasn't been updated. (We are done now.)
2584                      */
2585                     if (lop->nfslo_first != new_lop->nfslo_end) {
2586                         lop->nfslo_first = new_lop->nfslo_end;
2587                         modified = 1;
2588                     }
2589                     break;
2590                 }
2591                 if (new_lop->nfslo_end >= lop->nfslo_end) {
2592                     /*
2593                      * This case is where the new lock overlaps with the
2594                      * end of the old lock's range. Move the old lock's
2595                      * end to just before the new lock's first and insert
2596                      * the new lock after the old lock.
2597                      * Might not be done yet, since the new lock could
2598                      * overlap further locks with higher ranges.
2599                      */
2600                     if (lop->nfslo_end != new_lop->nfslo_first) {
2601                         lop->nfslo_end = new_lop->nfslo_first;
2602                         modified = 1;
2603                     }
2604                     ilop = lop;
2605                     lop = LIST_NEXT(lop, nfslo_list);
2606                     continue;
2607                 }
2608                 /*
2609                  * The final case is where the new lock's range is in the
2610                  * middle of the current lock's and splits the current lock
2611                  * up. Use *other_lopp to handle the second part of the
2612                  * split old lock range. (We are done now.)
2613                  * For unlock, we use new_lop as other_lop and tmp, since
2614                  * other_lop and new_lop are the same for this case.
2615                  * We noted the unlock case above, so we don't need
2616                  * new_lop->nfslo_type any longer.
2617                  */
2618                 tmp = new_lop->nfslo_first;
2619                 if (unlock) {
2620                     other_lop = new_lop;
2621                     *new_lopp = NULL;
2622                 } else {
2623                     other_lop = *other_lopp;
2624                     *other_lopp = NULL;
2625                 }
2626                 other_lop->nfslo_first = new_lop->nfslo_end;
2627                 other_lop->nfslo_end = lop->nfslo_end;
2628                 other_lop->nfslo_type = lop->nfslo_type;
2629                 lop->nfslo_end = tmp;
2630                 nfscl_insertlock(lp, other_lop, lop, local);
2631                 ilop = lop;
2632                 modified = 1;
2633                 break;
2634             }
2635             ilop = lop;
2636             lop = LIST_NEXT(lop, nfslo_list);
2637             if (lop == NULL)
2638                 break;
2639         }
2640
2641         /*
2642          * Insert the new lock in the list at the appropriate place.
2643          */
2644         if (!unlock) {
2645                 nfscl_insertlock(lp, new_lop, ilop, local);
2646                 *new_lopp = NULL;
2647                 modified = 1;
2648         }
2649         return (modified);
2650 }
2651
2652 /*
2653  * This function must be run as a kernel thread.
2654  * It does Renew Ops and recovery, when required.
2655  */
2656 void
2657 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2658 {
2659         struct nfsclowner *owp, *nowp;
2660         struct nfsclopen *op;
2661         struct nfscllockowner *lp, *nlp;
2662         struct nfscldeleghead dh;
2663         struct nfscldeleg *dp, *ndp;
2664         struct ucred *cred;
2665         u_int32_t clidrev;
2666         int error, cbpathdown, islept, igotlock, ret, clearok;
2667         uint32_t recover_done_time = 0;
2668         time_t mytime;
2669         static time_t prevsec = 0;
2670         struct nfscllockownerfh *lfhp, *nlfhp;
2671         struct nfscllockownerfhhead lfh;
2672         struct nfscllayout *lyp, *nlyp;
2673         struct nfscldevinfo *dip, *ndip;
2674         struct nfscllayouthead rlh;
2675         struct nfsclrecalllayout *recallp;
2676         struct nfsclds *dsp;
2677         bool retok;
2678         struct mount *mp;
2679         vnode_t vp;
2680
2681         cred = newnfs_getcred();
2682         NFSLOCKCLSTATE();
2683         clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2684         mp = clp->nfsc_nmp->nm_mountp;
2685         NFSUNLOCKCLSTATE();
2686         for(;;) {
2687                 newnfs_setroot(cred);
2688                 cbpathdown = 0;
2689                 if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2690                         /*
2691                          * Only allow one full recover within 1/2 of the lease
2692                          * duration (nfsc_renew).
2693                          * retok is value/result.  If passed in set to true,
2694                          * it indicates only a CreateSession operation should
2695                          * be attempted.
2696                          * If it is returned true, it indicates that the
2697                          * recovery only required a CreateSession.
2698                          */
2699                         retok = true;
2700                         if (recover_done_time < NFSD_MONOSEC) {
2701                                 recover_done_time = NFSD_MONOSEC +
2702                                     clp->nfsc_renew;
2703                                 retok = false;
2704                         }
2705                         NFSCL_DEBUG(1, "Doing recovery, only "
2706                             "createsession=%d\n", retok);
2707                         nfscl_recover(clp, &retok, cred, p);
2708                 }
2709                 if (clp->nfsc_expire <= NFSD_MONOSEC &&
2710                     (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2711                         clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2712                         clidrev = clp->nfsc_clientidrev;
2713                         error = nfsrpc_renew(clp, NULL, cred, p);
2714                         if (error == NFSERR_CBPATHDOWN)
2715                             cbpathdown = 1;
2716                         else if (error == NFSERR_STALECLIENTID ||
2717                             error == NFSERR_BADSESSION) {
2718                             NFSLOCKCLSTATE();
2719                             clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2720                             NFSUNLOCKCLSTATE();
2721                         } else if (error == NFSERR_EXPIRED)
2722                             (void) nfscl_hasexpired(clp, clidrev, p);
2723                 }
2724
2725 checkdsrenew:
2726                 if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2727                         /* Do renews for any DS sessions. */
2728                         NFSLOCKMNT(clp->nfsc_nmp);
2729                         /* Skip first entry, since the MDS is handled above. */
2730                         dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2731                         if (dsp != NULL)
2732                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2733                         while (dsp != NULL) {
2734                                 if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2735                                     dsp->nfsclds_sess.nfsess_defunct == 0) {
2736                                         dsp->nfsclds_expire = NFSD_MONOSEC +
2737                                             clp->nfsc_renew;
2738                                         NFSUNLOCKMNT(clp->nfsc_nmp);
2739                                         (void)nfsrpc_renew(clp, dsp, cred, p);
2740                                         goto checkdsrenew;
2741                                 }
2742                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2743                         }
2744                         NFSUNLOCKMNT(clp->nfsc_nmp);
2745                 }
2746
2747                 TAILQ_INIT(&dh);
2748                 NFSLOCKCLSTATE();
2749                 if (cbpathdown)
2750                         /* It's a Total Recall! */
2751                         nfscl_totalrecall(clp);
2752
2753                 /*
2754                  * Now, handle defunct owners.
2755                  */
2756                 LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2757                         if (LIST_EMPTY(&owp->nfsow_open)) {
2758                                 if (owp->nfsow_defunct != 0)
2759                                         nfscl_freeopenowner(owp, 0);
2760                         }
2761                 }
2762
2763                 /*
2764                  * Do the recall on any delegations. To avoid trouble, always
2765                  * come back up here after having slept.
2766                  */
2767                 igotlock = 0;
2768 tryagain:
2769                 dp = TAILQ_FIRST(&clp->nfsc_deleg);
2770                 while (dp != NULL) {
2771                         ndp = TAILQ_NEXT(dp, nfsdl_list);
2772                         if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2773                                 /*
2774                                  * Wait for outstanding I/O ops to be done.
2775                                  */
2776                                 if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2777                                     if (igotlock) {
2778                                         nfsv4_unlock(&clp->nfsc_lock, 0);
2779                                         igotlock = 0;
2780                                     }
2781                                     dp->nfsdl_rwlock.nfslock_lock |=
2782                                         NFSV4LOCK_WANTED;
2783                                     msleep(&dp->nfsdl_rwlock,
2784                                         NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2785                                         5 * hz);
2786                                     if (NFSCL_FORCEDISM(mp))
2787                                         goto terminate;
2788                                     goto tryagain;
2789                                 }
2790                                 while (!igotlock) {
2791                                     igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2792                                         &islept, NFSCLSTATEMUTEXPTR, mp);
2793                                     if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2794                                         goto terminate;
2795                                     if (islept)
2796                                         goto tryagain;
2797                                 }
2798                                 NFSUNLOCKCLSTATE();
2799                                 newnfs_copycred(&dp->nfsdl_cred, cred);
2800                                 ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2801                                     NULL, cred, p, 1, &vp);
2802                                 if (!ret) {
2803                                     nfscl_cleandeleg(dp);
2804                                     TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2805                                         nfsdl_list);
2806                                     LIST_REMOVE(dp, nfsdl_hash);
2807                                     TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2808                                     nfscl_delegcnt--;
2809                                     nfsstatsv1.cldelegates--;
2810                                 }
2811                                 NFSLOCKCLSTATE();
2812                                 /*
2813                                  * The nfsc_lock must be released before doing
2814                                  * vrele(), since it might call nfs_inactive().
2815                                  * For the unlikely case where the vnode failed
2816                                  * to be acquired by nfscl_recalldeleg(), a
2817                                  * VOP_RECLAIM() should be in progress and it
2818                                  * will return the delegation.
2819                                  */
2820                                 nfsv4_unlock(&clp->nfsc_lock, 0);
2821                                 igotlock = 0;
2822                                 if (vp != NULL) {
2823                                         NFSUNLOCKCLSTATE();
2824                                         vrele(vp);
2825                                         NFSLOCKCLSTATE();
2826                                 }
2827                                 goto tryagain;
2828                         }
2829                         dp = ndp;
2830                 }
2831
2832                 /*
2833                  * Clear out old delegations, if we are above the high water
2834                  * mark. Only clear out ones with no state related to them.
2835                  * The tailq list is in LRU order.
2836                  */
2837                 dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2838                 while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2839                     ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2840                     if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2841                         dp->nfsdl_rwlock.nfslock_lock == 0 &&
2842                         dp->nfsdl_timestamp < NFSD_MONOSEC &&
2843                         (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2844                           NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2845                         clearok = 1;
2846                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2847                             op = LIST_FIRST(&owp->nfsow_open);
2848                             if (op != NULL) {
2849                                 clearok = 0;
2850                                 break;
2851                             }
2852                         }
2853                         if (clearok) {
2854                             LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2855                                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
2856                                     clearok = 0;
2857                                     break;
2858                                 }
2859                             }
2860                         }
2861                         if (clearok) {
2862                             TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2863                             LIST_REMOVE(dp, nfsdl_hash);
2864                             TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2865                             nfscl_delegcnt--;
2866                             nfsstatsv1.cldelegates--;
2867                         }
2868                     }
2869                     dp = ndp;
2870                 }
2871                 if (igotlock)
2872                         nfsv4_unlock(&clp->nfsc_lock, 0);
2873
2874                 /*
2875                  * Do the recall on any layouts. To avoid trouble, always
2876                  * come back up here after having slept.
2877                  */
2878                 TAILQ_INIT(&rlh);
2879 tryagain2:
2880                 TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2881                         if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2882                                 /*
2883                                  * Wait for outstanding I/O ops to be done.
2884                                  */
2885                                 if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2886                                     (lyp->nfsly_lock.nfslock_lock &
2887                                      NFSV4LOCK_LOCK) != 0) {
2888                                         lyp->nfsly_lock.nfslock_lock |=
2889                                             NFSV4LOCK_WANTED;
2890                                         msleep(&lyp->nfsly_lock.nfslock_lock,
2891                                             NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2892                                             5 * hz);
2893                                         if (NFSCL_FORCEDISM(mp))
2894                                             goto terminate;
2895                                         goto tryagain2;
2896                                 }
2897                                 /* Move the layout to the recall list. */
2898                                 TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2899                                     nfsly_list);
2900                                 LIST_REMOVE(lyp, nfsly_hash);
2901                                 TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2902
2903                                 /* Handle any layout commits. */
2904                                 if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2905                                     (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2906                                         lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2907                                         NFSUNLOCKCLSTATE();
2908                                         NFSCL_DEBUG(3, "do layoutcommit\n");
2909                                         nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2910                                             cred, p);
2911                                         NFSLOCKCLSTATE();
2912                                         goto tryagain2;
2913                                 }
2914                         }
2915                 }
2916
2917                 /* Now, look for stale layouts. */
2918                 lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2919                 while (lyp != NULL) {
2920                         nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2921                         if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2922                             (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2923                             lyp->nfsly_lock.nfslock_usecnt == 0 &&
2924                             lyp->nfsly_lock.nfslock_lock == 0) {
2925                                 NFSCL_DEBUG(4, "ret stale lay=%d\n",
2926                                     nfscl_layoutcnt);
2927                                 recallp = malloc(sizeof(*recallp),
2928                                     M_NFSLAYRECALL, M_NOWAIT);
2929                                 if (recallp == NULL)
2930                                         break;
2931                                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2932                                     lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2933                                     lyp->nfsly_stateid.seqid, 0, 0, NULL,
2934                                     recallp);
2935                         }
2936                         lyp = nlyp;
2937                 }
2938
2939                 /*
2940                  * Free up any unreferenced device info structures.
2941                  */
2942                 LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2943                         if (dip->nfsdi_layoutrefs == 0 &&
2944                             dip->nfsdi_refcnt == 0) {
2945                                 NFSCL_DEBUG(4, "freeing devinfo\n");
2946                                 LIST_REMOVE(dip, nfsdi_list);
2947                                 nfscl_freedevinfo(dip);
2948                         }
2949                 }
2950                 NFSUNLOCKCLSTATE();
2951
2952                 /* Do layout return(s), as required. */
2953                 TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2954                         TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2955                         NFSCL_DEBUG(4, "ret layout\n");
2956                         nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2957                         nfscl_freelayout(lyp);
2958                 }
2959
2960                 /*
2961                  * Delegreturn any delegations cleaned out or recalled.
2962                  */
2963                 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2964                         newnfs_copycred(&dp->nfsdl_cred, cred);
2965                         (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2966                         TAILQ_REMOVE(&dh, dp, nfsdl_list);
2967                         free(dp, M_NFSCLDELEG);
2968                 }
2969
2970                 SLIST_INIT(&lfh);
2971                 /*
2972                  * Call nfscl_cleanupkext() once per second to check for
2973                  * open/lock owners where the process has exited.
2974                  */
2975                 mytime = NFSD_MONOSEC;
2976                 if (prevsec != mytime) {
2977                         prevsec = mytime;
2978                         nfscl_cleanupkext(clp, &lfh);
2979                 }
2980
2981                 /*
2982                  * Do a ReleaseLockOwner for all lock owners where the
2983                  * associated process no longer exists, as found by
2984                  * nfscl_cleanupkext().
2985                  */
2986                 newnfs_setroot(cred);
2987                 SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2988                         LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2989                             nlp) {
2990                                 (void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2991                                     lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2992                                     p);
2993                                 nfscl_freelockowner(lp, 0);
2994                         }
2995                         free(lfhp, M_TEMP);
2996                 }
2997                 SLIST_INIT(&lfh);
2998
2999                 NFSLOCKCLSTATE();
3000                 if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
3001                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
3002                             hz);
3003 terminate:
3004                 if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
3005                         clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
3006                         NFSUNLOCKCLSTATE();
3007                         NFSFREECRED(cred);
3008                         wakeup((caddr_t)clp);
3009                         return;
3010                 }
3011                 NFSUNLOCKCLSTATE();
3012         }
3013 }
3014
3015 /*
3016  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
3017  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
3018  */
3019 void
3020 nfscl_initiate_recovery(struct nfsclclient *clp)
3021 {
3022
3023         if (clp == NULL)
3024                 return;
3025         NFSLOCKCLSTATE();
3026         clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
3027         NFSUNLOCKCLSTATE();
3028         wakeup((caddr_t)clp);
3029 }
3030
3031 /*
3032  * Dump out the state stuff for debugging.
3033  */
3034 void
3035 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
3036     int lockowner, int locks)
3037 {
3038         struct nfsclclient *clp;
3039         struct nfsclowner *owp;
3040         struct nfsclopen *op;
3041         struct nfscllockowner *lp;
3042         struct nfscllock *lop;
3043         struct nfscldeleg *dp;
3044
3045         clp = nmp->nm_clp;
3046         if (clp == NULL) {
3047                 printf("nfscl dumpstate NULL clp\n");
3048                 return;
3049         }
3050         NFSLOCKCLSTATE();
3051         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
3052           LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3053             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3054                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3055                     owp->nfsow_owner[0], owp->nfsow_owner[1],
3056                     owp->nfsow_owner[2], owp->nfsow_owner[3],
3057                     owp->nfsow_seqid);
3058             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3059                 if (opens)
3060                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3061                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3062                         op->nfso_stateid.other[2], op->nfso_opencnt,
3063                         op->nfso_fh[12]);
3064                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3065                     if (lockowner)
3066                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3067                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3068                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3069                             lp->nfsl_seqid,
3070                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3071                             lp->nfsl_stateid.other[2]);
3072                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3073                         if (locks)
3074 #ifdef __FreeBSD__
3075                             printf("lck typ=%d fst=%ju end=%ju\n",
3076                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3077                                 (intmax_t)lop->nfslo_end);
3078 #else
3079                             printf("lck typ=%d fst=%qd end=%qd\n",
3080                                 lop->nfslo_type, lop->nfslo_first,
3081                                 lop->nfslo_end);
3082 #endif
3083                     }
3084                 }
3085             }
3086           }
3087         }
3088         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3089             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
3090                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
3091                     owp->nfsow_owner[0], owp->nfsow_owner[1],
3092                     owp->nfsow_owner[2], owp->nfsow_owner[3],
3093                     owp->nfsow_seqid);
3094             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3095                 if (opens)
3096                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
3097                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
3098                         op->nfso_stateid.other[2], op->nfso_opencnt,
3099                         op->nfso_fh[12]);
3100                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
3101                     if (lockowner)
3102                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
3103                             lp->nfsl_owner[0], lp->nfsl_owner[1],
3104                             lp->nfsl_owner[2], lp->nfsl_owner[3],
3105                             lp->nfsl_seqid,
3106                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
3107                             lp->nfsl_stateid.other[2]);
3108                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3109                         if (locks)
3110 #ifdef __FreeBSD__
3111                             printf("lck typ=%d fst=%ju end=%ju\n",
3112                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3113                                 (intmax_t)lop->nfslo_end);
3114 #else
3115                             printf("lck typ=%d fst=%qd end=%qd\n",
3116                                 lop->nfslo_type, lop->nfslo_first,
3117                                 lop->nfslo_end);
3118 #endif
3119                     }
3120                 }
3121             }
3122         }
3123         NFSUNLOCKCLSTATE();
3124 }
3125
3126 /*
3127  * Check for duplicate open owners and opens.
3128  * (Only used as a diagnostic aid.)
3129  */
3130 void
3131 nfscl_dupopen(vnode_t vp, int dupopens)
3132 {
3133         struct nfsclclient *clp;
3134         struct nfsclowner *owp, *owp2;
3135         struct nfsclopen *op, *op2;
3136         struct nfsfh *nfhp;
3137
3138         clp = VFSTONFS(vp->v_mount)->nm_clp;
3139         if (clp == NULL) {
3140                 printf("nfscl dupopen NULL clp\n");
3141                 return;
3142         }
3143         nfhp = VTONFS(vp)->n_fhp;
3144         NFSLOCKCLSTATE();
3145
3146         /*
3147          * First, search for duplicate owners.
3148          * These should never happen!
3149          */
3150         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3151             LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3152                 if (owp != owp2 &&
3153                     !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3154                     NFSV4CL_LOCKNAMELEN)) {
3155                         NFSUNLOCKCLSTATE();
3156                         printf("DUP OWNER\n");
3157                         nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3158                         return;
3159                 }
3160             }
3161         }
3162
3163         /*
3164          * Now, search for duplicate stateids.
3165          * These shouldn't happen, either.
3166          */
3167         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3168             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3169                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3170                     LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3171                         if (op != op2 &&
3172                             (op->nfso_stateid.other[0] != 0 ||
3173                              op->nfso_stateid.other[1] != 0 ||
3174                              op->nfso_stateid.other[2] != 0) &&
3175                             op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3176                             op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3177                             op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3178                             NFSUNLOCKCLSTATE();
3179                             printf("DUP STATEID\n");
3180                             nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3181                             return;
3182                         }
3183                     }
3184                 }
3185             }
3186         }
3187
3188         /*
3189          * Now search for duplicate opens.
3190          * Duplicate opens for the same owner
3191          * should never occur. Other duplicates are
3192          * possible and are checked for if "dupopens"
3193          * is true.
3194          */
3195         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3196             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3197                 if (nfhp->nfh_len == op2->nfso_fhlen &&
3198                     !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3199                     LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3200                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3201                             if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3202                                 !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3203                                 (!NFSBCMP(op->nfso_own->nfsow_owner,
3204                                  op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3205                                  dupopens)) {
3206                                 if (!NFSBCMP(op->nfso_own->nfsow_owner,
3207                                     op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3208                                     NFSUNLOCKCLSTATE();
3209                                     printf("BADDUP OPEN\n");
3210                                 } else {
3211                                     NFSUNLOCKCLSTATE();
3212                                     printf("DUP OPEN\n");
3213                                 }
3214                                 nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3215                                     0);
3216                                 return;
3217                             }
3218                         }
3219                     }
3220                 }
3221             }
3222         }
3223         NFSUNLOCKCLSTATE();
3224 }
3225
3226 /*
3227  * During close, find an open that needs to be dereferenced and
3228  * dereference it. If there are no more opens for this file,
3229  * log a message to that effect.
3230  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3231  * on the file's vnode.
3232  * This is the safe way, since it is difficult to identify
3233  * which open the close is for and I/O can be performed after the
3234  * close(2) system call when a file is mmap'd.
3235  * If it returns 0 for success, there will be a referenced
3236  * clp returned via clpp.
3237  */
3238 int
3239 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3240 {
3241         struct nfsclclient *clp;
3242         struct nfsclowner *owp;
3243         struct nfsclopen *op;
3244         struct nfscldeleg *dp;
3245         struct nfsfh *nfhp;
3246         int error, notdecr;
3247
3248         error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3249         if (error)
3250                 return (error);
3251         *clpp = clp;
3252
3253         nfhp = VTONFS(vp)->n_fhp;
3254         notdecr = 1;
3255         NFSLOCKCLSTATE();
3256         /*
3257          * First, look for one under a delegation that was locally issued
3258          * and just decrement the opencnt for it. Since all my Opens against
3259          * the server are DENY_NONE, I don't see a problem with hanging
3260          * onto them. (It is much easier to use one of the extant Opens
3261          * that I already have on the server when a Delegation is recalled
3262          * than to do fresh Opens.) Someday, I might need to rethink this, but.
3263          */
3264         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3265         if (dp != NULL) {
3266                 LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3267                         op = LIST_FIRST(&owp->nfsow_open);
3268                         if (op != NULL) {
3269                                 /*
3270                                  * Since a delegation is for a file, there
3271                                  * should never be more than one open for
3272                                  * each openowner.
3273                                  */
3274                                 if (LIST_NEXT(op, nfso_list) != NULL)
3275                                         panic("nfscdeleg opens");
3276                                 if (notdecr && op->nfso_opencnt > 0) {
3277                                         notdecr = 0;
3278                                         op->nfso_opencnt--;
3279                                         break;
3280                                 }
3281                         }
3282                 }
3283         }
3284
3285         /* Now process the opens against the server. */
3286         LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3287             nfso_hash) {
3288                 if (op->nfso_fhlen == nfhp->nfh_len &&
3289                     !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3290                     nfhp->nfh_len)) {
3291                         /* Found an open, decrement cnt if possible */
3292                         if (notdecr && op->nfso_opencnt > 0) {
3293                                 notdecr = 0;
3294                                 op->nfso_opencnt--;
3295                         }
3296                         /*
3297                          * There are more opens, so just return.
3298                          */
3299                         if (op->nfso_opencnt > 0) {
3300                                 NFSUNLOCKCLSTATE();
3301                                 return (0);
3302                         }
3303                 }
3304         }
3305         NFSUNLOCKCLSTATE();
3306         if (notdecr)
3307                 printf("nfscl: never fnd open\n");
3308         return (0);
3309 }
3310
3311 int
3312 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3313 {
3314         struct nfsclclient *clp;
3315         struct nfsclowner *owp, *nowp;
3316         struct nfsclopen *op;
3317         struct nfscldeleg *dp;
3318         struct nfsfh *nfhp;
3319         struct nfsclrecalllayout *recallp;
3320         int error;
3321
3322         error = nfscl_getcl(vp->v_mount, NULL, NULL, false, &clp);
3323         if (error)
3324                 return (error);
3325         *clpp = clp;
3326
3327         nfhp = VTONFS(vp)->n_fhp;
3328         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3329         NFSLOCKCLSTATE();
3330         /*
3331          * First get rid of the local Open structures, which should be no
3332          * longer in use.
3333          */
3334         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3335         if (dp != NULL) {
3336                 LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3337                         op = LIST_FIRST(&owp->nfsow_open);
3338                         if (op != NULL) {
3339                                 KASSERT((op->nfso_opencnt == 0),
3340                                     ("nfscl: bad open cnt on deleg"));
3341                                 nfscl_freeopen(op, 1);
3342                         }
3343                         nfscl_freeopenowner(owp, 1);
3344                 }
3345         }
3346
3347         /* Return any layouts marked return on close. */
3348         nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3349
3350         /* Now process the opens against the server. */
3351 lookformore:
3352         LIST_FOREACH(op, NFSCLOPENHASH(clp, nfhp->nfh_fh, nfhp->nfh_len),
3353             nfso_hash) {
3354                 if (op->nfso_fhlen == nfhp->nfh_len &&
3355                     !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3356                     nfhp->nfh_len)) {
3357                         /* Found an open, close it. */
3358 #ifdef DIAGNOSTIC
3359                         KASSERT((op->nfso_opencnt == 0),
3360                             ("nfscl: bad open cnt on server (%d)",
3361                              op->nfso_opencnt));
3362 #endif
3363                         NFSUNLOCKCLSTATE();
3364                         nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3365                         NFSLOCKCLSTATE();
3366                         goto lookformore;
3367                 }
3368         }
3369         NFSUNLOCKCLSTATE();
3370         /*
3371          * recallp has been set NULL by nfscl_retoncloselayout() if it was
3372          * used by the function, but calling free() with a NULL pointer is ok.
3373          */
3374         free(recallp, M_NFSLAYRECALL);
3375         return (0);
3376 }
3377
3378 /*
3379  * Return all delegations on this client.
3380  * (Must be called with client sleep lock.)
3381  */
3382 static void
3383 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3384 {
3385         struct nfscldeleg *dp, *ndp;
3386         struct ucred *cred;
3387
3388         cred = newnfs_getcred();
3389         TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3390                 nfscl_cleandeleg(dp);
3391                 (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3392                 nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
3393         }
3394         NFSFREECRED(cred);
3395 }
3396
3397 /*
3398  * Return any delegation for this vp.
3399  */
3400 void
3401 nfscl_delegreturnvp(vnode_t vp, NFSPROC_T *p)
3402 {
3403         struct nfsclclient *clp;
3404         struct nfscldeleg *dp;
3405         struct ucred *cred;
3406         struct nfsnode *np;
3407         struct nfsmount *nmp;
3408
3409         nmp = VFSTONFS(vp->v_mount);
3410         NFSLOCKMNT(nmp);
3411         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
3412                 NFSUNLOCKMNT(nmp);
3413                 return;
3414         }
3415         NFSUNLOCKMNT(nmp);
3416         np = VTONFS(vp);
3417         cred = newnfs_getcred();
3418         dp = NULL;
3419         NFSLOCKCLSTATE();
3420         clp = nmp->nm_clp;
3421         if (clp != NULL)
3422                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
3423                     np->n_fhp->nfh_len);
3424         if (dp != NULL) {
3425                 nfscl_cleandeleg(dp);
3426                 nfscl_freedeleg(&clp->nfsc_deleg, dp, false);
3427                 NFSUNLOCKCLSTATE();
3428                 newnfs_copycred(&dp->nfsdl_cred, cred);
3429                 nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3430                 free(dp, M_NFSCLDELEG);
3431         } else
3432                 NFSUNLOCKCLSTATE();
3433         NFSFREECRED(cred);
3434 }
3435
3436 /*
3437  * Do a callback RPC.
3438  */
3439 void
3440 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3441 {
3442         int clist, gotseq_ok, i, j, k, op, rcalls;
3443         u_int32_t *tl;
3444         struct nfsclclient *clp;
3445         struct nfscldeleg *dp = NULL;
3446         int numops, taglen = -1, error = 0, trunc __unused;
3447         u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3448         u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3449         vnode_t vp = NULL;
3450         struct nfsnode *np;
3451         struct vattr va;
3452         struct nfsfh *nfhp;
3453         mount_t mp;
3454         nfsattrbit_t attrbits, rattrbits;
3455         nfsv4stateid_t stateid;
3456         uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3457         uint8_t sessionid[NFSX_V4SESSIONID];
3458         struct mbuf *rep;
3459         struct nfscllayout *lyp;
3460         uint64_t filesid[2], len, off;
3461         int changed, gotone, laytype, recalltype;
3462         uint32_t iomode;
3463         struct nfsclrecalllayout *recallp = NULL;
3464         struct nfsclsession *tsep;
3465
3466         gotseq_ok = 0;
3467         nfsrvd_rephead(nd);
3468         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3469         taglen = fxdr_unsigned(int, *tl);
3470         if (taglen < 0) {
3471                 error = EBADRPC;
3472                 goto nfsmout;
3473         }
3474         if (taglen <= NFSV4_SMALLSTR)
3475                 tagstr = tag;
3476         else
3477                 tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3478         error = nfsrv_mtostr(nd, tagstr, taglen);
3479         if (error) {
3480                 if (taglen > NFSV4_SMALLSTR)
3481                         free(tagstr, M_TEMP);
3482                 taglen = -1;
3483                 goto nfsmout;
3484         }
3485         (void) nfsm_strtom(nd, tag, taglen);
3486         if (taglen > NFSV4_SMALLSTR) {
3487                 free(tagstr, M_TEMP);
3488         }
3489         NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3490         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3491         minorvers = fxdr_unsigned(u_int32_t, *tl++);
3492         if (minorvers != NFSV4_MINORVERSION &&
3493             minorvers != NFSV41_MINORVERSION &&
3494             minorvers != NFSV42_MINORVERSION)
3495                 nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3496         cbident = fxdr_unsigned(u_int32_t, *tl++);
3497         if (nd->nd_repstat)
3498                 numops = 0;
3499         else
3500                 numops = fxdr_unsigned(int, *tl);
3501         /*
3502          * Loop around doing the sub ops.
3503          */
3504         for (i = 0; i < numops; i++) {
3505                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3506                 NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3507                 *repp++ = *tl;
3508                 op = fxdr_unsigned(int, *tl);
3509                 if (op < NFSV4OP_CBGETATTR ||
3510                    (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3511                    (op > NFSV4OP_CBNOTIFYDEVID &&
3512                     minorvers == NFSV41_MINORVERSION) ||
3513                    (op > NFSV4OP_CBOFFLOAD &&
3514                     minorvers == NFSV42_MINORVERSION)) {
3515                     nd->nd_repstat = NFSERR_OPILLEGAL;
3516                     *repp = nfscl_errmap(nd, minorvers);
3517                     retops++;
3518                     break;
3519                 }
3520                 nd->nd_procnum = op;
3521                 if (op < NFSV42_CBNOPS)
3522                         nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3523                 switch (op) {
3524                 case NFSV4OP_CBGETATTR:
3525                         NFSCL_DEBUG(4, "cbgetattr\n");
3526                         mp = NULL;
3527                         vp = NULL;
3528                         error = nfsm_getfh(nd, &nfhp);
3529                         if (!error)
3530                                 error = nfsrv_getattrbits(nd, &attrbits,
3531                                     NULL, NULL);
3532                         if (error == 0 && i == 0 &&
3533                             minorvers != NFSV4_MINORVERSION)
3534                                 error = NFSERR_OPNOTINSESS;
3535                         if (!error) {
3536                                 mp = nfscl_getmnt(minorvers, sessionid, cbident,
3537                                     &clp);
3538                                 if (mp == NULL)
3539                                         error = NFSERR_SERVERFAULT;
3540                         }
3541                         if (!error) {
3542                                 error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3543                                     nfhp->nfh_len, p, &np);
3544                                 if (!error)
3545                                         vp = NFSTOV(np);
3546                         }
3547                         if (!error) {
3548                                 NFSZERO_ATTRBIT(&rattrbits);
3549                                 NFSLOCKCLSTATE();
3550                                 dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3551                                     nfhp->nfh_len);
3552                                 if (dp != NULL) {
3553                                         if (NFSISSET_ATTRBIT(&attrbits,
3554                                             NFSATTRBIT_SIZE)) {
3555                                                 if (vp != NULL)
3556                                                         va.va_size = np->n_size;
3557                                                 else
3558                                                         va.va_size =
3559                                                             dp->nfsdl_size;
3560                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3561                                                     NFSATTRBIT_SIZE);
3562                                         }
3563                                         if (NFSISSET_ATTRBIT(&attrbits,
3564                                             NFSATTRBIT_CHANGE)) {
3565                                                 va.va_filerev =
3566                                                     dp->nfsdl_change;
3567                                                 if (vp == NULL ||
3568                                                     (np->n_flag & NDELEGMOD))
3569                                                         va.va_filerev++;
3570                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3571                                                     NFSATTRBIT_CHANGE);
3572                                         }
3573                                 } else
3574                                         error = NFSERR_SERVERFAULT;
3575                                 NFSUNLOCKCLSTATE();
3576                         }
3577                         if (vp != NULL)
3578                                 vrele(vp);
3579                         if (mp != NULL)
3580                                 vfs_unbusy(mp);
3581                         if (nfhp != NULL)
3582                                 free(nfhp, M_NFSFH);
3583                         if (!error)
3584                                 (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3585                                     NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3586                                     (uint64_t)0, NULL);
3587                         break;
3588                 case NFSV4OP_CBRECALL:
3589                         NFSCL_DEBUG(4, "cbrecall\n");
3590                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3591                             NFSX_UNSIGNED);
3592                         stateid.seqid = *tl++;
3593                         NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3594                             NFSX_STATEIDOTHER);
3595                         tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3596                         trunc = fxdr_unsigned(int, *tl);
3597                         error = nfsm_getfh(nd, &nfhp);
3598                         if (error == 0 && i == 0 &&
3599                             minorvers != NFSV4_MINORVERSION)
3600                                 error = NFSERR_OPNOTINSESS;
3601                         if (!error) {
3602                                 NFSLOCKCLSTATE();
3603                                 if (minorvers == NFSV4_MINORVERSION)
3604                                         clp = nfscl_getclnt(cbident);
3605                                 else
3606                                         clp = nfscl_getclntsess(sessionid);
3607                                 if (clp != NULL) {
3608                                         dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3609                                             nfhp->nfh_len);
3610                                         if (dp != NULL && (dp->nfsdl_flags &
3611                                             NFSCLDL_DELEGRET) == 0) {
3612                                                 dp->nfsdl_flags |=
3613                                                     NFSCLDL_RECALL;
3614                                                 wakeup((caddr_t)clp);
3615                                         }
3616                                 } else {
3617                                         error = NFSERR_SERVERFAULT;
3618                                 }
3619                                 NFSUNLOCKCLSTATE();
3620                         }
3621                         if (nfhp != NULL)
3622                                 free(nfhp, M_NFSFH);
3623                         break;
3624                 case NFSV4OP_CBLAYOUTRECALL:
3625                         NFSCL_DEBUG(4, "cblayrec\n");
3626                         nfhp = NULL;
3627                         NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3628                         laytype = fxdr_unsigned(int, *tl++);
3629                         iomode = fxdr_unsigned(uint32_t, *tl++);
3630                         if (newnfs_true == *tl++)
3631                                 changed = 1;
3632                         else
3633                                 changed = 0;
3634                         recalltype = fxdr_unsigned(int, *tl);
3635                         NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3636                             laytype, iomode, changed, recalltype);
3637                         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3638                             M_WAITOK);
3639                         if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3640                             laytype != NFSLAYOUT_FLEXFILE)
3641                                 error = NFSERR_NOMATCHLAYOUT;
3642                         else if (recalltype == NFSLAYOUTRETURN_FILE) {
3643                                 error = nfsm_getfh(nd, &nfhp);
3644                                 NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3645                                 if (error != 0)
3646                                         goto nfsmout;
3647                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3648                                     NFSX_STATEID);
3649                                 off = fxdr_hyper(tl); tl += 2;
3650                                 len = fxdr_hyper(tl); tl += 2;
3651                                 stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3652                                 NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3653                                 if (minorvers == NFSV4_MINORVERSION)
3654                                         error = NFSERR_NOTSUPP;
3655                                 else if (i == 0)
3656                                         error = NFSERR_OPNOTINSESS;
3657                                 NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3658                                     (uintmax_t)off, (uintmax_t)len,
3659                                     stateid.seqid, error);
3660                                 if (error == 0) {
3661                                         NFSLOCKCLSTATE();
3662                                         clp = nfscl_getclntsess(sessionid);
3663                                         NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3664                                         if (clp != NULL) {
3665                                                 lyp = nfscl_findlayout(clp,
3666                                                     nfhp->nfh_fh,
3667                                                     nfhp->nfh_len);
3668                                                 NFSCL_DEBUG(4, "cblyp=%p\n",
3669                                                     lyp);
3670                                                 if (lyp != NULL &&
3671                                                     (lyp->nfsly_flags &
3672                                                      (NFSLY_FILES |
3673                                                       NFSLY_FLEXFILE)) != 0 &&
3674                                                     !NFSBCMP(stateid.other,
3675                                                     lyp->nfsly_stateid.other,
3676                                                     NFSX_STATEIDOTHER)) {
3677                                                         error =
3678                                                             nfscl_layoutrecall(
3679                                                             recalltype,
3680                                                             lyp, iomode, off,
3681                                                             len, stateid.seqid,
3682                                                             0, 0, NULL,
3683                                                             recallp);
3684                                                         if (error == 0 &&
3685                                                             stateid.seqid >
3686                                                             lyp->nfsly_stateid.seqid)
3687                                                                 lyp->nfsly_stateid.seqid =
3688                                                                     stateid.seqid;
3689                                                         recallp = NULL;
3690                                                         wakeup(clp);
3691                                                         NFSCL_DEBUG(4,
3692                                                             "aft layrcal=%d "
3693                                                             "layseqid=%d\n",
3694                                                             error,
3695                                                             lyp->nfsly_stateid.seqid);
3696                                                 } else
3697                                                         error =
3698                                                           NFSERR_NOMATCHLAYOUT;
3699                                         } else
3700                                                 error = NFSERR_NOMATCHLAYOUT;
3701                                         NFSUNLOCKCLSTATE();
3702                                 }
3703                                 free(nfhp, M_NFSFH);
3704                         } else if (recalltype == NFSLAYOUTRETURN_FSID) {
3705                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3706                                 filesid[0] = fxdr_hyper(tl); tl += 2;
3707                                 filesid[1] = fxdr_hyper(tl); tl += 2;
3708                                 gotone = 0;
3709                                 NFSLOCKCLSTATE();
3710                                 clp = nfscl_getclntsess(sessionid);
3711                                 if (clp != NULL) {
3712                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3713                                             nfsly_list) {
3714                                                 if (lyp->nfsly_filesid[0] ==
3715                                                     filesid[0] &&
3716                                                     lyp->nfsly_filesid[1] ==
3717                                                     filesid[1]) {
3718                                                         error =
3719                                                             nfscl_layoutrecall(
3720                                                             recalltype,
3721                                                             lyp, iomode, 0,
3722                                                             UINT64_MAX,
3723                                                             lyp->nfsly_stateid.seqid,
3724                                                             0, 0, NULL,
3725                                                             recallp);
3726                                                         recallp = NULL;
3727                                                         gotone = 1;
3728                                                 }
3729                                         }
3730                                         if (gotone != 0)
3731                                                 wakeup(clp);
3732                                         else
3733                                                 error = NFSERR_NOMATCHLAYOUT;
3734                                 } else
3735                                         error = NFSERR_NOMATCHLAYOUT;
3736                                 NFSUNLOCKCLSTATE();
3737                         } else if (recalltype == NFSLAYOUTRETURN_ALL) {
3738                                 gotone = 0;
3739                                 NFSLOCKCLSTATE();
3740                                 clp = nfscl_getclntsess(sessionid);
3741                                 if (clp != NULL) {
3742                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3743                                             nfsly_list) {
3744                                                 error = nfscl_layoutrecall(
3745                                                     recalltype, lyp, iomode, 0,
3746                                                     UINT64_MAX,
3747                                                     lyp->nfsly_stateid.seqid,
3748                                                     0, 0, NULL, recallp);
3749                                                 recallp = NULL;
3750                                                 gotone = 1;
3751                                         }
3752                                         if (gotone != 0)
3753                                                 wakeup(clp);
3754                                         else
3755                                                 error = NFSERR_NOMATCHLAYOUT;
3756                                 } else
3757                                         error = NFSERR_NOMATCHLAYOUT;
3758                                 NFSUNLOCKCLSTATE();
3759                         } else
3760                                 error = NFSERR_NOMATCHLAYOUT;
3761                         if (recallp != NULL) {
3762                                 free(recallp, M_NFSLAYRECALL);
3763                                 recallp = NULL;
3764                         }
3765                         break;
3766                 case NFSV4OP_CBSEQUENCE:
3767                         NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3768                             5 * NFSX_UNSIGNED);
3769                         bcopy(tl, sessionid, NFSX_V4SESSIONID);
3770                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3771                         seqid = fxdr_unsigned(uint32_t, *tl++);
3772                         slotid = fxdr_unsigned(uint32_t, *tl++);
3773                         highslot = fxdr_unsigned(uint32_t, *tl++);
3774                         cachethis = *tl++;
3775                         /* Throw away the referring call stuff. */
3776                         clist = fxdr_unsigned(int, *tl);
3777                         for (j = 0; j < clist; j++) {
3778                                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3779                                     NFSX_UNSIGNED);
3780                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3781                                 rcalls = fxdr_unsigned(int, *tl);
3782                                 for (k = 0; k < rcalls; k++) {
3783                                         NFSM_DISSECT(tl, uint32_t *,
3784                                             2 * NFSX_UNSIGNED);
3785                                 }
3786                         }
3787                         NFSLOCKCLSTATE();
3788                         if (i == 0) {
3789                                 clp = nfscl_getclntsess(sessionid);
3790                                 if (clp == NULL)
3791                                         error = NFSERR_SERVERFAULT;
3792                         } else
3793                                 error = NFSERR_SEQUENCEPOS;
3794                         if (error == 0) {
3795                                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3796                                 error = nfsv4_seqsession(seqid, slotid,
3797                                     highslot, tsep->nfsess_cbslots, &rep,
3798                                     tsep->nfsess_backslots);
3799                         }
3800                         NFSUNLOCKCLSTATE();
3801                         if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3802                                 gotseq_ok = 1;
3803                                 if (rep != NULL) {
3804                                         /*
3805                                          * Handle a reply for a retried
3806                                          * callback.  The reply will be
3807                                          * re-inserted in the session cache
3808                                          * by the nfsv4_seqsess_cacherep() call
3809                                          * after out:
3810                                          */
3811                                         KASSERT(error == NFSERR_REPLYFROMCACHE,
3812                                             ("cbsequence: non-NULL rep"));
3813                                         NFSCL_DEBUG(4, "Got cbretry\n");
3814                                         m_freem(nd->nd_mreq);
3815                                         nd->nd_mreq = rep;
3816                                         rep = NULL;
3817                                         goto out;
3818                                 }
3819                                 NFSM_BUILD(tl, uint32_t *,
3820                                     NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3821                                 bcopy(sessionid, tl, NFSX_V4SESSIONID);
3822                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3823                                 *tl++ = txdr_unsigned(seqid);
3824                                 *tl++ = txdr_unsigned(slotid);
3825                                 *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3826                                 *tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3827                         }
3828                         break;
3829                 default:
3830                         if (i == 0 && minorvers != NFSV4_MINORVERSION)
3831                                 error = NFSERR_OPNOTINSESS;
3832                         else {
3833                                 NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3834                                 error = NFSERR_NOTSUPP;
3835                         }
3836                         break;
3837                 }
3838                 if (error) {
3839                         if (error == EBADRPC || error == NFSERR_BADXDR) {
3840                                 nd->nd_repstat = NFSERR_BADXDR;
3841                         } else {
3842                                 nd->nd_repstat = error;
3843                         }
3844                         error = 0;
3845                 }
3846                 retops++;
3847                 if (nd->nd_repstat) {
3848                         *repp = nfscl_errmap(nd, minorvers);
3849                         break;
3850                 } else
3851                         *repp = 0;      /* NFS4_OK */
3852         }
3853 nfsmout:
3854         if (recallp != NULL)
3855                 free(recallp, M_NFSLAYRECALL);
3856         if (error) {
3857                 if (error == EBADRPC || error == NFSERR_BADXDR)
3858                         nd->nd_repstat = NFSERR_BADXDR;
3859                 else
3860                         printf("nfsv4 comperr1=%d\n", error);
3861         }
3862         if (taglen == -1) {
3863                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3864                 *tl++ = 0;
3865                 *tl = 0;
3866         } else {
3867                 *retopsp = txdr_unsigned(retops);
3868         }
3869         *nd->nd_errp = nfscl_errmap(nd, minorvers);
3870 out:
3871         if (gotseq_ok != 0) {
3872                 rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3873                 NFSLOCKCLSTATE();
3874                 clp = nfscl_getclntsess(sessionid);
3875                 if (clp != NULL) {
3876                         tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3877                         nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3878                             NFSERR_OK, &rep);
3879                         NFSUNLOCKCLSTATE();
3880                 } else {
3881                         NFSUNLOCKCLSTATE();
3882                         m_freem(rep);
3883                 }
3884         }
3885 }
3886
3887 /*
3888  * Generate the next cbident value. Basically just increment a static value
3889  * and then check that it isn't already in the list, if it has wrapped around.
3890  */
3891 static u_int32_t
3892 nfscl_nextcbident(void)
3893 {
3894         struct nfsclclient *clp;
3895         int matched;
3896         static u_int32_t nextcbident = 0;
3897         static int haswrapped = 0;
3898
3899         nextcbident++;
3900         if (nextcbident == 0)
3901                 haswrapped = 1;
3902         if (haswrapped) {
3903                 /*
3904                  * Search the clientid list for one already using this cbident.
3905                  */
3906                 do {
3907                         matched = 0;
3908                         NFSLOCKCLSTATE();
3909                         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3910                                 if (clp->nfsc_cbident == nextcbident) {
3911                                         matched = 1;
3912                                         break;
3913                                 }
3914                         }
3915                         NFSUNLOCKCLSTATE();
3916                         if (matched == 1)
3917                                 nextcbident++;
3918                 } while (matched);
3919         }
3920         return (nextcbident);
3921 }
3922
3923 /*
3924  * Get the mount point related to a given cbident or session and busy it.
3925  */
3926 static mount_t
3927 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3928     struct nfsclclient **clpp)
3929 {
3930         struct nfsclclient *clp;
3931         mount_t mp;
3932         int error;
3933         struct nfsclsession *tsep;
3934
3935         *clpp = NULL;
3936         NFSLOCKCLSTATE();
3937         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3938                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3939                 if (minorvers == NFSV4_MINORVERSION) {
3940                         if (clp->nfsc_cbident == cbident)
3941                                 break;
3942                 } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3943                     NFSX_V4SESSIONID))
3944                         break;
3945         }
3946         if (clp == NULL) {
3947                 NFSUNLOCKCLSTATE();
3948                 return (NULL);
3949         }
3950         mp = clp->nfsc_nmp->nm_mountp;
3951         vfs_ref(mp);
3952         NFSUNLOCKCLSTATE();
3953         error = vfs_busy(mp, 0);
3954         vfs_rel(mp);
3955         if (error != 0)
3956                 return (NULL);
3957         *clpp = clp;
3958         return (mp);
3959 }
3960
3961 /*
3962  * Get the clientid pointer related to a given cbident.
3963  */
3964 static struct nfsclclient *
3965 nfscl_getclnt(u_int32_t cbident)
3966 {
3967         struct nfsclclient *clp;
3968
3969         LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3970                 if (clp->nfsc_cbident == cbident)
3971                         break;
3972         return (clp);
3973 }
3974
3975 /*
3976  * Get the clientid pointer related to a given sessionid.
3977  */
3978 static struct nfsclclient *
3979 nfscl_getclntsess(uint8_t *sessionid)
3980 {
3981         struct nfsclclient *clp;
3982         struct nfsclsession *tsep;
3983
3984         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3985                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3986                 if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3987                     NFSX_V4SESSIONID))
3988                         break;
3989         }
3990         return (clp);
3991 }
3992
3993 /*
3994  * Search for a lock conflict locally on the client. A conflict occurs if
3995  * - not same owner and overlapping byte range and at least one of them is
3996  *   a write lock or this is an unlock.
3997  */
3998 static int
3999 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
4000     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
4001     struct nfscllock **lopp)
4002 {
4003         struct nfsclopen *op;
4004         int ret;
4005
4006         if (dp != NULL) {
4007                 ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
4008                 if (ret)
4009                         return (ret);
4010         }
4011         LIST_FOREACH(op, NFSCLOPENHASH(clp, fhp, fhlen), nfso_hash) {
4012                 if (op->nfso_fhlen == fhlen &&
4013                     !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
4014                         ret = nfscl_checkconflict(&op->nfso_lock, nlop,
4015                             own, lopp);
4016                         if (ret)
4017                                 return (ret);
4018                 }
4019         }
4020         return (0);
4021 }
4022
4023 static int
4024 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
4025     u_int8_t *own, struct nfscllock **lopp)
4026 {
4027         struct nfscllockowner *lp;
4028         struct nfscllock *lop;
4029
4030         LIST_FOREACH(lp, lhp, nfsl_list) {
4031                 if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
4032                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
4033                                 if (lop->nfslo_first >= nlop->nfslo_end)
4034                                         break;
4035                                 if (lop->nfslo_end <= nlop->nfslo_first)
4036                                         continue;
4037                                 if (lop->nfslo_type == F_WRLCK ||
4038                                     nlop->nfslo_type == F_WRLCK ||
4039                                     nlop->nfslo_type == F_UNLCK) {
4040                                         if (lopp != NULL)
4041                                                 *lopp = lop;
4042                                         return (NFSERR_DENIED);
4043                                 }
4044                         }
4045                 }
4046         }
4047         return (0);
4048 }
4049
4050 /*
4051  * Check for a local conflicting lock.
4052  */
4053 int
4054 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
4055     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
4056 {
4057         struct nfscllock *lop, nlck;
4058         struct nfscldeleg *dp;
4059         struct nfsnode *np;
4060         u_int8_t own[NFSV4CL_LOCKNAMELEN];
4061         int error;
4062
4063         nlck.nfslo_type = fl->l_type;
4064         nlck.nfslo_first = off;
4065         if (len == NFS64BITSSET) {
4066                 nlck.nfslo_end = NFS64BITSSET;
4067         } else {
4068                 nlck.nfslo_end = off + len;
4069                 if (nlck.nfslo_end <= nlck.nfslo_first)
4070                         return (NFSERR_INVAL);
4071         }
4072         np = VTONFS(vp);
4073         nfscl_filllockowner(id, own, flags);
4074         NFSLOCKCLSTATE();
4075         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4076         error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
4077             &nlck, own, dp, &lop);
4078         if (error != 0) {
4079                 fl->l_whence = SEEK_SET;
4080                 fl->l_start = lop->nfslo_first;
4081                 if (lop->nfslo_end == NFS64BITSSET)
4082                         fl->l_len = 0;
4083                 else
4084                         fl->l_len = lop->nfslo_end - lop->nfslo_first;
4085                 fl->l_pid = (pid_t)0;
4086                 fl->l_type = lop->nfslo_type;
4087                 error = -1;                     /* no RPC required */
4088         } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
4089             fl->l_type == F_RDLCK)) {
4090                 /*
4091                  * The delegation ensures that there isn't a conflicting
4092                  * lock on the server, so return -1 to indicate an RPC
4093                  * isn't required.
4094                  */
4095                 fl->l_type = F_UNLCK;
4096                 error = -1;
4097         }
4098         NFSUNLOCKCLSTATE();
4099         return (error);
4100 }
4101
4102 /*
4103  * Handle Recall of a delegation.
4104  * The clp must be exclusive locked when this is called.
4105  */
4106 static int
4107 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
4108     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4109     int called_from_renewthread, vnode_t *vpp)
4110 {
4111         struct nfsclowner *owp, *lowp, *nowp;
4112         struct nfsclopen *op, *lop;
4113         struct nfscllockowner *lp;
4114         struct nfscllock *lckp;
4115         struct nfsnode *np;
4116         int error = 0, ret;
4117
4118         if (vp == NULL) {
4119                 KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
4120                 *vpp = NULL;
4121                 /*
4122                  * First, get a vnode for the file. This is needed to do RPCs.
4123                  */
4124                 ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
4125                     dp->nfsdl_fhlen, p, &np);
4126                 if (ret) {
4127                         /*
4128                          * File isn't open, so nothing to move over to the
4129                          * server.
4130                          */
4131                         return (0);
4132                 }
4133                 vp = NFSTOV(np);
4134                 *vpp = vp;
4135         } else {
4136                 np = VTONFS(vp);
4137         }
4138         dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
4139
4140         /*
4141          * Ok, if it's a write delegation, flush data to the server, so
4142          * that close/open consistency is retained.
4143          */
4144         ret = 0;
4145         NFSLOCKNODE(np);
4146         if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4147                 np->n_flag |= NDELEGRECALL;
4148                 NFSUNLOCKNODE(np);
4149                 ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4150                 NFSLOCKNODE(np);
4151                 np->n_flag &= ~NDELEGRECALL;
4152         }
4153         NFSINVALATTRCACHE(np);
4154         NFSUNLOCKNODE(np);
4155         if (ret == EIO && called_from_renewthread != 0) {
4156                 /*
4157                  * If the flush failed with EIO for the renew thread,
4158                  * return now, so that the dirty buffer will be flushed
4159                  * later.
4160                  */
4161                 return (ret);
4162         }
4163
4164         /*
4165          * Now, for each openowner with opens issued locally, move them
4166          * over to state against the server.
4167          */
4168         LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4169                 lop = LIST_FIRST(&lowp->nfsow_open);
4170                 if (lop != NULL) {
4171                         if (LIST_NEXT(lop, nfso_list) != NULL)
4172                                 panic("nfsdlg mult opens");
4173                         /*
4174                          * Look for the same openowner against the server.
4175                          */
4176                         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4177                                 if (!NFSBCMP(lowp->nfsow_owner,
4178                                     owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4179                                         newnfs_copycred(&dp->nfsdl_cred, cred);
4180                                         ret = nfscl_moveopen(vp, clp, nmp, lop,
4181                                             owp, dp, cred, p);
4182                                         if (ret == NFSERR_STALECLIENTID ||
4183                                             ret == NFSERR_STALEDONTRECOVER ||
4184                                             ret == NFSERR_BADSESSION)
4185                                                 return (ret);
4186                                         if (ret) {
4187                                                 nfscl_freeopen(lop, 1);
4188                                                 if (!error)
4189                                                         error = ret;
4190                                         }
4191                                         break;
4192                                 }
4193                         }
4194
4195                         /*
4196                          * If no openowner found, create one and get an open
4197                          * for it.
4198                          */
4199                         if (owp == NULL) {
4200                                 nowp = malloc(
4201                                     sizeof (struct nfsclowner), M_NFSCLOWNER,
4202                                     M_WAITOK);
4203                                 nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
4204                                     NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4205                                     dp->nfsdl_fhlen, NULL, NULL);
4206                                 newnfs_copycred(&dp->nfsdl_cred, cred);
4207                                 ret = nfscl_moveopen(vp, clp, nmp, lop,
4208                                     owp, dp, cred, p);
4209                                 if (ret) {
4210                                         nfscl_freeopenowner(owp, 0);
4211                                         if (ret == NFSERR_STALECLIENTID ||
4212                                             ret == NFSERR_STALEDONTRECOVER ||
4213                                             ret == NFSERR_BADSESSION)
4214                                                 return (ret);
4215                                         if (ret) {
4216                                                 nfscl_freeopen(lop, 1);
4217                                                 if (!error)
4218                                                         error = ret;
4219                                         }
4220                                 }
4221                         }
4222                 }
4223         }
4224
4225         /*
4226          * Now, get byte range locks for any locks done locally.
4227          */
4228         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4229                 LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4230                         newnfs_copycred(&dp->nfsdl_cred, cred);
4231                         ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4232                         if (ret == NFSERR_STALESTATEID ||
4233                             ret == NFSERR_STALEDONTRECOVER ||
4234                             ret == NFSERR_STALECLIENTID ||
4235                             ret == NFSERR_BADSESSION)
4236                                 return (ret);
4237                         if (ret && !error)
4238                                 error = ret;
4239                 }
4240         }
4241         return (error);
4242 }
4243
4244 /*
4245  * Move a locally issued open over to an owner on the state list.
4246  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4247  * returns with it unlocked.
4248  */
4249 static int
4250 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4251     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4252     struct ucred *cred, NFSPROC_T *p)
4253 {
4254         struct nfsclopen *op, *nop;
4255         struct nfscldeleg *ndp;
4256         struct nfsnode *np;
4257         int error = 0, newone;
4258
4259         /*
4260          * First, look for an appropriate open, If found, just increment the
4261          * opencnt in it.
4262          */
4263         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4264                 if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4265                     op->nfso_fhlen == lop->nfso_fhlen &&
4266                     !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4267                         op->nfso_opencnt += lop->nfso_opencnt;
4268                         nfscl_freeopen(lop, 1);
4269                         return (0);
4270                 }
4271         }
4272
4273         /* No appropriate open, so we have to do one against the server. */
4274         np = VTONFS(vp);
4275         nop = malloc(sizeof (struct nfsclopen) +
4276             lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4277         nop->nfso_hash.le_prev = NULL;
4278         newone = 0;
4279         nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4280             lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4281         ndp = dp;
4282         error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4283             lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4284             NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4285         if (error) {
4286                 if (newone)
4287                         nfscl_freeopen(op, 0);
4288         } else {
4289                 op->nfso_mode |= lop->nfso_mode;
4290                 op->nfso_opencnt += lop->nfso_opencnt;
4291                 nfscl_freeopen(lop, 1);
4292         }
4293         if (nop != NULL)
4294                 free(nop, M_NFSCLOPEN);
4295         if (ndp != NULL) {
4296                 /*
4297                  * What should I do with the returned delegation, since the
4298                  * delegation is being recalled? For now, just printf and
4299                  * through it away.
4300                  */
4301                 printf("Moveopen returned deleg\n");
4302                 free(ndp, M_NFSCLDELEG);
4303         }
4304         return (error);
4305 }
4306
4307 /*
4308  * Recall all delegations on this client.
4309  */
4310 static void
4311 nfscl_totalrecall(struct nfsclclient *clp)
4312 {
4313         struct nfscldeleg *dp;
4314
4315         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4316                 if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4317                         dp->nfsdl_flags |= NFSCLDL_RECALL;
4318         }
4319 }
4320
4321 /*
4322  * Relock byte ranges. Called for delegation recall and state expiry.
4323  */
4324 static int
4325 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4326     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4327     NFSPROC_T *p)
4328 {
4329         struct nfscllockowner *nlp;
4330         struct nfsfh *nfhp;
4331         struct nfsnode *np;
4332         u_int64_t off, len;
4333         int error, newone, donelocally;
4334
4335         if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
4336                 np = VTONFS(vp);
4337                 NFSLOCKNODE(np);
4338                 np->n_flag |= NMIGHTBELOCKED;
4339                 NFSUNLOCKNODE(np);
4340         }
4341
4342         off = lop->nfslo_first;
4343         len = lop->nfslo_end - lop->nfslo_first;
4344         error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4345             clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4346             lp->nfsl_openowner, &nlp, &newone, &donelocally);
4347         if (error || donelocally)
4348                 return (error);
4349         nfhp = VTONFS(vp)->n_fhp;
4350         error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4351             nfhp->nfh_len, nlp, newone, 0, off,
4352             len, lop->nfslo_type, cred, p);
4353         if (error)
4354                 nfscl_freelockowner(nlp, 0);
4355         return (error);
4356 }
4357
4358 /*
4359  * Called to re-open a file. Basically get a vnode for the file handle
4360  * and then call nfsrpc_openrpc() to do the rest.
4361  */
4362 static int
4363 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4364     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4365     struct ucred *cred, NFSPROC_T *p)
4366 {
4367         struct nfsnode *np;
4368         vnode_t vp;
4369         int error;
4370
4371         error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4372         if (error)
4373                 return (error);
4374         vp = NFSTOV(np);
4375         if (np->n_v4 != NULL) {
4376                 error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4377                     np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4378                     NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4379                     cred, p);
4380         } else {
4381                 error = EINVAL;
4382         }
4383         vrele(vp);
4384         return (error);
4385 }
4386
4387 /*
4388  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4389  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4390  * fail.
4391  */
4392 static int
4393 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4394     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4395     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4396     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4397 {
4398         int error;
4399
4400         do {
4401                 error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4402                     mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4403                     0, 0);
4404                 if (error == NFSERR_DELAY)
4405                         (void) nfs_catnap(PZERO, error, "nfstryop");
4406         } while (error == NFSERR_DELAY);
4407         if (error == EAUTH || error == EACCES) {
4408                 /* Try again using system credentials */
4409                 newnfs_setroot(cred);
4410                 do {
4411                     error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4412                         newfhlen, mode, op, name, namelen, ndpp, reclaim,
4413                         delegtype, cred, p, 1, 0);
4414                     if (error == NFSERR_DELAY)
4415                         (void) nfs_catnap(PZERO, error, "nfstryop");
4416                 } while (error == NFSERR_DELAY);
4417         }
4418         return (error);
4419 }
4420
4421 /*
4422  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4423  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4424  * cred don't work.
4425  */
4426 static int
4427 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4428     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4429     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4430 {
4431         struct nfsrv_descript nfsd, *nd = &nfsd;
4432         int error;
4433
4434         do {
4435                 error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4436                     reclaim, off, len, type, cred, p, 0);
4437                 if (!error && nd->nd_repstat == NFSERR_DELAY)
4438                         (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4439                             "nfstrylck");
4440         } while (!error && nd->nd_repstat == NFSERR_DELAY);
4441         if (!error)
4442                 error = nd->nd_repstat;
4443         if (error == EAUTH || error == EACCES) {
4444                 /* Try again using root credentials */
4445                 newnfs_setroot(cred);
4446                 do {
4447                         error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4448                             newone, reclaim, off, len, type, cred, p, 1);
4449                         if (!error && nd->nd_repstat == NFSERR_DELAY)
4450                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4451                                     "nfstrylck");
4452                 } while (!error && nd->nd_repstat == NFSERR_DELAY);
4453                 if (!error)
4454                         error = nd->nd_repstat;
4455         }
4456         return (error);
4457 }
4458
4459 /*
4460  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4461  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4462  * credentials fail.
4463  */
4464 int
4465 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4466     struct nfsmount *nmp, NFSPROC_T *p)
4467 {
4468         int error;
4469
4470         do {
4471                 error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4472                 if (error == NFSERR_DELAY)
4473                         (void) nfs_catnap(PZERO, error, "nfstrydp");
4474         } while (error == NFSERR_DELAY);
4475         if (error == EAUTH || error == EACCES) {
4476                 /* Try again using system credentials */
4477                 newnfs_setroot(cred);
4478                 do {
4479                         error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4480                         if (error == NFSERR_DELAY)
4481                                 (void) nfs_catnap(PZERO, error, "nfstrydp");
4482                 } while (error == NFSERR_DELAY);
4483         }
4484         return (error);
4485 }
4486
4487 /*
4488  * Try a close against the server. Just call nfsrpc_closerpc(),
4489  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4490  * credentials fail.
4491  */
4492 int
4493 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4494     struct nfsmount *nmp, NFSPROC_T *p)
4495 {
4496         struct nfsrv_descript nfsd, *nd = &nfsd;
4497         int error;
4498
4499         do {
4500                 error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4501                 if (error == NFSERR_DELAY)
4502                         (void) nfs_catnap(PZERO, error, "nfstrycl");
4503         } while (error == NFSERR_DELAY);
4504         if (error == EAUTH || error == EACCES) {
4505                 /* Try again using system credentials */
4506                 newnfs_setroot(cred);
4507                 do {
4508                         error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4509                         if (error == NFSERR_DELAY)
4510                                 (void) nfs_catnap(PZERO, error, "nfstrycl");
4511                 } while (error == NFSERR_DELAY);
4512         }
4513         return (error);
4514 }
4515
4516 /*
4517  * Decide if a delegation on a file permits close without flushing writes
4518  * to the server. This might be a big performance win in some environments.
4519  * (Not useful until the client does caching on local stable storage.)
4520  */
4521 int
4522 nfscl_mustflush(vnode_t vp)
4523 {
4524         struct nfsclclient *clp;
4525         struct nfscldeleg *dp;
4526         struct nfsnode *np;
4527         struct nfsmount *nmp;
4528
4529         np = VTONFS(vp);
4530         nmp = VFSTONFS(vp->v_mount);
4531         if (!NFSHASNFSV4(nmp))
4532                 return (1);
4533         NFSLOCKCLSTATE();
4534         clp = nfscl_findcl(nmp);
4535         if (clp == NULL) {
4536                 NFSUNLOCKCLSTATE();
4537                 return (1);
4538         }
4539         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4540         if (dp != NULL && (dp->nfsdl_flags &
4541             (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4542              NFSCLDL_WRITE &&
4543             (dp->nfsdl_sizelimit >= np->n_size ||
4544              !NFSHASSTRICT3530(nmp))) {
4545                 NFSUNLOCKCLSTATE();
4546                 return (0);
4547         }
4548         NFSUNLOCKCLSTATE();
4549         return (1);
4550 }
4551
4552 /*
4553  * See if a (write) delegation exists for this file.
4554  */
4555 int
4556 nfscl_nodeleg(vnode_t vp, int writedeleg)
4557 {
4558         struct nfsclclient *clp;
4559         struct nfscldeleg *dp;
4560         struct nfsnode *np;
4561         struct nfsmount *nmp;
4562
4563         np = VTONFS(vp);
4564         nmp = VFSTONFS(vp->v_mount);
4565         if (!NFSHASNFSV4(nmp))
4566                 return (1);
4567         NFSLOCKMNT(nmp);
4568         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4569                 NFSUNLOCKMNT(nmp);
4570                 return (1);
4571         }
4572         NFSUNLOCKMNT(nmp);
4573         NFSLOCKCLSTATE();
4574         clp = nfscl_findcl(nmp);
4575         if (clp == NULL) {
4576                 NFSUNLOCKCLSTATE();
4577                 return (1);
4578         }
4579         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4580         if (dp != NULL &&
4581             (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4582             (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4583              NFSCLDL_WRITE)) {
4584                 NFSUNLOCKCLSTATE();
4585                 return (0);
4586         }
4587         NFSUNLOCKCLSTATE();
4588         return (1);
4589 }
4590
4591 /*
4592  * Look for an associated delegation that should be DelegReturned.
4593  */
4594 int
4595 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4596 {
4597         struct nfsclclient *clp;
4598         struct nfscldeleg *dp;
4599         struct nfsclowner *owp;
4600         struct nfscllockowner *lp;
4601         struct nfsmount *nmp;
4602         struct ucred *cred;
4603         struct nfsnode *np;
4604         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4605
4606         nmp = VFSTONFS(vp->v_mount);
4607         np = VTONFS(vp);
4608         NFSLOCKCLSTATE();
4609         /*
4610          * Loop around waiting for:
4611          * - outstanding I/O operations on delegations to complete
4612          * - for a delegation on vp that has state, lock the client and
4613          *   do a recall
4614          * - return delegation with no state
4615          */
4616         while (1) {
4617                 clp = nfscl_findcl(nmp);
4618                 if (clp == NULL) {
4619                         NFSUNLOCKCLSTATE();
4620                         return (retcnt);
4621                 }
4622                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4623                     np->n_fhp->nfh_len);
4624                 if (dp != NULL) {
4625                     /*
4626                      * Wait for outstanding I/O ops to be done.
4627                      */
4628                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4629                         if (igotlock) {
4630                             nfsv4_unlock(&clp->nfsc_lock, 0);
4631                             igotlock = 0;
4632                         }
4633                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4634                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4635                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4636                         continue;
4637                     }
4638                     needsrecall = 0;
4639                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4640                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4641                             needsrecall = 1;
4642                             break;
4643                         }
4644                     }
4645                     if (!needsrecall) {
4646                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4647                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4648                                 needsrecall = 1;
4649                                 break;
4650                             }
4651                         }
4652                     }
4653                     if (needsrecall && !triedrecall) {
4654                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4655                         islept = 0;
4656                         while (!igotlock) {
4657                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4658                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4659                             if (islept)
4660                                 break;
4661                         }
4662                         if (islept)
4663                             continue;
4664                         NFSUNLOCKCLSTATE();
4665                         cred = newnfs_getcred();
4666                         newnfs_copycred(&dp->nfsdl_cred, cred);
4667                         nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4668                         NFSFREECRED(cred);
4669                         triedrecall = 1;
4670                         NFSLOCKCLSTATE();
4671                         nfsv4_unlock(&clp->nfsc_lock, 0);
4672                         igotlock = 0;
4673                         continue;
4674                     }
4675                     *stp = dp->nfsdl_stateid;
4676                     retcnt = 1;
4677                     nfscl_cleandeleg(dp);
4678                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4679                 }
4680                 if (igotlock)
4681                     nfsv4_unlock(&clp->nfsc_lock, 0);
4682                 NFSUNLOCKCLSTATE();
4683                 return (retcnt);
4684         }
4685 }
4686
4687 /*
4688  * Look for associated delegation(s) that should be DelegReturned.
4689  */
4690 int
4691 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4692     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4693 {
4694         struct nfsclclient *clp;
4695         struct nfscldeleg *dp;
4696         struct nfsclowner *owp;
4697         struct nfscllockowner *lp;
4698         struct nfsmount *nmp;
4699         struct ucred *cred;
4700         struct nfsnode *np;
4701         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4702
4703         nmp = VFSTONFS(fvp->v_mount);
4704         *gotfdp = 0;
4705         *gottdp = 0;
4706         NFSLOCKCLSTATE();
4707         /*
4708          * Loop around waiting for:
4709          * - outstanding I/O operations on delegations to complete
4710          * - for a delegation on fvp that has state, lock the client and
4711          *   do a recall
4712          * - return delegation(s) with no state.
4713          */
4714         while (1) {
4715                 clp = nfscl_findcl(nmp);
4716                 if (clp == NULL) {
4717                         NFSUNLOCKCLSTATE();
4718                         return (retcnt);
4719                 }
4720                 np = VTONFS(fvp);
4721                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4722                     np->n_fhp->nfh_len);
4723                 if (dp != NULL && *gotfdp == 0) {
4724                     /*
4725                      * Wait for outstanding I/O ops to be done.
4726                      */
4727                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4728                         if (igotlock) {
4729                             nfsv4_unlock(&clp->nfsc_lock, 0);
4730                             igotlock = 0;
4731                         }
4732                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4733                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4734                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4735                         continue;
4736                     }
4737                     needsrecall = 0;
4738                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4739                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4740                             needsrecall = 1;
4741                             break;
4742                         }
4743                     }
4744                     if (!needsrecall) {
4745                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4746                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4747                                 needsrecall = 1;
4748                                 break;
4749                             }
4750                         }
4751                     }
4752                     if (needsrecall && !triedrecall) {
4753                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4754                         islept = 0;
4755                         while (!igotlock) {
4756                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4757                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4758                             if (islept)
4759                                 break;
4760                         }
4761                         if (islept)
4762                             continue;
4763                         NFSUNLOCKCLSTATE();
4764                         cred = newnfs_getcred();
4765                         newnfs_copycred(&dp->nfsdl_cred, cred);
4766                         nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4767                         NFSFREECRED(cred);
4768                         triedrecall = 1;
4769                         NFSLOCKCLSTATE();
4770                         nfsv4_unlock(&clp->nfsc_lock, 0);
4771                         igotlock = 0;
4772                         continue;
4773                     }
4774                     *fstp = dp->nfsdl_stateid;
4775                     retcnt++;
4776                     *gotfdp = 1;
4777                     nfscl_cleandeleg(dp);
4778                     nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4779                 }
4780                 if (igotlock) {
4781                     nfsv4_unlock(&clp->nfsc_lock, 0);
4782                     igotlock = 0;
4783                 }
4784                 if (tvp != NULL) {
4785                     np = VTONFS(tvp);
4786                     dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4787                         np->n_fhp->nfh_len);
4788                     if (dp != NULL && *gottdp == 0) {
4789                         /*
4790                          * Wait for outstanding I/O ops to be done.
4791                          */
4792                         if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4793                             dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4794                             (void) nfsmsleep(&dp->nfsdl_rwlock,
4795                                 NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4796                             continue;
4797                         }
4798                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4799                             if (!LIST_EMPTY(&owp->nfsow_open)) {
4800                                 NFSUNLOCKCLSTATE();
4801                                 return (retcnt);
4802                             }
4803                         }
4804                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4805                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4806                                 NFSUNLOCKCLSTATE();
4807                                 return (retcnt);
4808                             }
4809                         }
4810                         *tstp = dp->nfsdl_stateid;
4811                         retcnt++;
4812                         *gottdp = 1;
4813                         nfscl_cleandeleg(dp);
4814                         nfscl_freedeleg(&clp->nfsc_deleg, dp, true);
4815                     }
4816                 }
4817                 NFSUNLOCKCLSTATE();
4818                 return (retcnt);
4819         }
4820 }
4821
4822 /*
4823  * Get a reference on the clientid associated with the mount point.
4824  * Return 1 if success, 0 otherwise.
4825  */
4826 int
4827 nfscl_getref(struct nfsmount *nmp)
4828 {
4829         struct nfsclclient *clp;
4830
4831         NFSLOCKCLSTATE();
4832         clp = nfscl_findcl(nmp);
4833         if (clp == NULL) {
4834                 NFSUNLOCKCLSTATE();
4835                 return (0);
4836         }
4837         nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4838         NFSUNLOCKCLSTATE();
4839         return (1);
4840 }
4841
4842 /*
4843  * Release a reference on a clientid acquired with the above call.
4844  */
4845 void
4846 nfscl_relref(struct nfsmount *nmp)
4847 {
4848         struct nfsclclient *clp;
4849
4850         NFSLOCKCLSTATE();
4851         clp = nfscl_findcl(nmp);
4852         if (clp == NULL) {
4853                 NFSUNLOCKCLSTATE();
4854                 return;
4855         }
4856         nfsv4_relref(&clp->nfsc_lock);
4857         NFSUNLOCKCLSTATE();
4858 }
4859
4860 /*
4861  * Save the size attribute in the delegation, since the nfsnode
4862  * is going away.
4863  */
4864 void
4865 nfscl_reclaimnode(vnode_t vp)
4866 {
4867         struct nfsclclient *clp;
4868         struct nfscldeleg *dp;
4869         struct nfsnode *np = VTONFS(vp);
4870         struct nfsmount *nmp;
4871
4872         nmp = VFSTONFS(vp->v_mount);
4873         if (!NFSHASNFSV4(nmp))
4874                 return;
4875         NFSLOCKCLSTATE();
4876         clp = nfscl_findcl(nmp);
4877         if (clp == NULL) {
4878                 NFSUNLOCKCLSTATE();
4879                 return;
4880         }
4881         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4882         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4883                 dp->nfsdl_size = np->n_size;
4884         NFSUNLOCKCLSTATE();
4885 }
4886
4887 /*
4888  * Get the saved size attribute in the delegation, since it is a
4889  * newly allocated nfsnode.
4890  */
4891 void
4892 nfscl_newnode(vnode_t vp)
4893 {
4894         struct nfsclclient *clp;
4895         struct nfscldeleg *dp;
4896         struct nfsnode *np = VTONFS(vp);
4897         struct nfsmount *nmp;
4898
4899         nmp = VFSTONFS(vp->v_mount);
4900         if (!NFSHASNFSV4(nmp))
4901                 return;
4902         NFSLOCKCLSTATE();
4903         clp = nfscl_findcl(nmp);
4904         if (clp == NULL) {
4905                 NFSUNLOCKCLSTATE();
4906                 return;
4907         }
4908         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4909         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4910                 np->n_size = dp->nfsdl_size;
4911         NFSUNLOCKCLSTATE();
4912 }
4913
4914 /*
4915  * If there is a valid write delegation for this file, set the modtime
4916  * to the local clock time.
4917  */
4918 void
4919 nfscl_delegmodtime(vnode_t vp)
4920 {
4921         struct nfsclclient *clp;
4922         struct nfscldeleg *dp;
4923         struct nfsnode *np = VTONFS(vp);
4924         struct nfsmount *nmp;
4925
4926         nmp = VFSTONFS(vp->v_mount);
4927         if (!NFSHASNFSV4(nmp))
4928                 return;
4929         NFSLOCKMNT(nmp);
4930         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4931                 NFSUNLOCKMNT(nmp);
4932                 return;
4933         }
4934         NFSUNLOCKMNT(nmp);
4935         NFSLOCKCLSTATE();
4936         clp = nfscl_findcl(nmp);
4937         if (clp == NULL) {
4938                 NFSUNLOCKCLSTATE();
4939                 return;
4940         }
4941         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4942         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4943                 nanotime(&dp->nfsdl_modtime);
4944                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4945         }
4946         NFSUNLOCKCLSTATE();
4947 }
4948
4949 /*
4950  * If there is a valid write delegation for this file with a modtime set,
4951  * put that modtime in mtime.
4952  */
4953 void
4954 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4955 {
4956         struct nfsclclient *clp;
4957         struct nfscldeleg *dp;
4958         struct nfsnode *np = VTONFS(vp);
4959         struct nfsmount *nmp;
4960
4961         nmp = VFSTONFS(vp->v_mount);
4962         if (!NFSHASNFSV4(nmp))
4963                 return;
4964         NFSLOCKMNT(nmp);
4965         if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0) {
4966                 NFSUNLOCKMNT(nmp);
4967                 return;
4968         }
4969         NFSUNLOCKMNT(nmp);
4970         NFSLOCKCLSTATE();
4971         clp = nfscl_findcl(nmp);
4972         if (clp == NULL) {
4973                 NFSUNLOCKCLSTATE();
4974                 return;
4975         }
4976         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4977         if (dp != NULL &&
4978             (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4979             (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4980                 *mtime = dp->nfsdl_modtime;
4981         NFSUNLOCKCLSTATE();
4982 }
4983
4984 static int
4985 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4986 {
4987         short *defaulterrp, *errp;
4988
4989         if (!nd->nd_repstat)
4990                 return (0);
4991         if (nd->nd_procnum == NFSPROC_NOOP)
4992                 return (txdr_unsigned(nd->nd_repstat & 0xffff));
4993         if (nd->nd_repstat == EBADRPC)
4994                 return (txdr_unsigned(NFSERR_BADXDR));
4995         if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4996             nd->nd_repstat == NFSERR_OPILLEGAL)
4997                 return (txdr_unsigned(nd->nd_repstat));
4998         if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4999             minorvers > NFSV4_MINORVERSION) {
5000                 /* NFSv4.n error. */
5001                 return (txdr_unsigned(nd->nd_repstat));
5002         }
5003         if (nd->nd_procnum < NFSV4OP_CBNOPS)
5004                 errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
5005         else
5006                 return (txdr_unsigned(nd->nd_repstat));
5007         while (*++errp)
5008                 if (*errp == (short)nd->nd_repstat)
5009                         return (txdr_unsigned(nd->nd_repstat));
5010         return (txdr_unsigned(*defaulterrp));
5011 }
5012
5013 /*
5014  * Called to find/add a layout to a client.
5015  * This function returns the layout with a refcnt (shared lock) upon
5016  * success (returns 0) or with no lock/refcnt on the layout when an
5017  * error is returned.
5018  * If a layout is passed in via lypp, it is locked (exclusively locked).
5019  */
5020 int
5021 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
5022     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
5023     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
5024     struct ucred *cred, NFSPROC_T *p)
5025 {
5026         struct nfsclclient *clp;
5027         struct nfscllayout *lyp, *tlyp;
5028         struct nfsclflayout *flp;
5029         struct nfsnode *np = VTONFS(vp);
5030         mount_t mp;
5031         int layout_passed_in;
5032
5033         mp = nmp->nm_mountp;
5034         layout_passed_in = 1;
5035         tlyp = NULL;
5036         lyp = *lypp;
5037         if (lyp == NULL) {
5038                 layout_passed_in = 0;
5039                 tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
5040                     M_WAITOK | M_ZERO);
5041         }
5042
5043         NFSLOCKCLSTATE();
5044         clp = nmp->nm_clp;
5045         if (clp == NULL) {
5046                 if (layout_passed_in != 0)
5047                         nfsv4_unlock(&lyp->nfsly_lock, 0);
5048                 NFSUNLOCKCLSTATE();
5049                 if (tlyp != NULL)
5050                         free(tlyp, M_NFSLAYOUT);
5051                 return (EPERM);
5052         }
5053         if (lyp == NULL) {
5054                 /*
5055                  * Although no lyp was passed in, another thread might have
5056                  * allocated one. If one is found, just increment it's ref
5057                  * count and return it.
5058                  */
5059                 lyp = nfscl_findlayout(clp, fhp, fhlen);
5060                 if (lyp == NULL) {
5061                         lyp = tlyp;
5062                         tlyp = NULL;
5063                         lyp->nfsly_stateid.seqid = stateidp->seqid;
5064                         lyp->nfsly_stateid.other[0] = stateidp->other[0];
5065                         lyp->nfsly_stateid.other[1] = stateidp->other[1];
5066                         lyp->nfsly_stateid.other[2] = stateidp->other[2];
5067                         lyp->nfsly_lastbyte = 0;
5068                         LIST_INIT(&lyp->nfsly_flayread);
5069                         LIST_INIT(&lyp->nfsly_flayrw);
5070                         LIST_INIT(&lyp->nfsly_recall);
5071                         lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
5072                         lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
5073                         lyp->nfsly_clp = clp;
5074                         if (layouttype == NFSLAYOUT_FLEXFILE)
5075                                 lyp->nfsly_flags = NFSLY_FLEXFILE;
5076                         else
5077                                 lyp->nfsly_flags = NFSLY_FILES;
5078                         if (retonclose != 0)
5079                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5080                         lyp->nfsly_fhlen = fhlen;
5081                         NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
5082                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5083                         LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
5084                             nfsly_hash);
5085                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5086                         nfscl_layoutcnt++;
5087                 } else {
5088                         if (retonclose != 0)
5089                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
5090                         if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5091                                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5092                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5093                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5094                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5095                 }
5096                 nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5097                 if (NFSCL_FORCEDISM(mp)) {
5098                         NFSUNLOCKCLSTATE();
5099                         if (tlyp != NULL)
5100                                 free(tlyp, M_NFSLAYOUT);
5101                         return (EPERM);
5102                 }
5103                 *lypp = lyp;
5104         } else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
5105                 lyp->nfsly_stateid.seqid = stateidp->seqid;
5106
5107         /* Merge the new list of File Layouts into the list. */
5108         flp = LIST_FIRST(fhlp);
5109         if (flp != NULL) {
5110                 if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
5111                         nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
5112                 else
5113                         nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
5114         }
5115         if (layout_passed_in != 0)
5116                 nfsv4_unlock(&lyp->nfsly_lock, 1);
5117         NFSUNLOCKCLSTATE();
5118         if (tlyp != NULL)
5119                 free(tlyp, M_NFSLAYOUT);
5120         return (0);
5121 }
5122
5123 /*
5124  * Search for a layout by MDS file handle.
5125  * If one is found, it is returned with a refcnt (shared lock) iff
5126  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
5127  * returned NULL.
5128  */
5129 struct nfscllayout *
5130 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
5131     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
5132 {
5133         struct nfscllayout *lyp;
5134         mount_t mp;
5135         int error, igotlock;
5136
5137         mp = clp->nfsc_nmp->nm_mountp;
5138         *recalledp = 0;
5139         *retflpp = NULL;
5140         NFSLOCKCLSTATE();
5141         lyp = nfscl_findlayout(clp, fhp, fhlen);
5142         if (lyp != NULL) {
5143                 if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5144                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
5145                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
5146                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
5147                         error = nfscl_findlayoutforio(lyp, off,
5148                             NFSV4OPEN_ACCESSREAD, retflpp);
5149                         if (error == 0)
5150                                 nfsv4_getref(&lyp->nfsly_lock, NULL,
5151                                     NFSCLSTATEMUTEXPTR, mp);
5152                         else {
5153                                 do {
5154                                         igotlock = nfsv4_lock(&lyp->nfsly_lock,
5155                                             1, NULL, NFSCLSTATEMUTEXPTR, mp);
5156                                 } while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
5157                                 *retflpp = NULL;
5158                         }
5159                         if (NFSCL_FORCEDISM(mp)) {
5160                                 lyp = NULL;
5161                                 *recalledp = 1;
5162                         }
5163                 } else {
5164                         lyp = NULL;
5165                         *recalledp = 1;
5166                 }
5167         }
5168         NFSUNLOCKCLSTATE();
5169         return (lyp);
5170 }
5171
5172 /*
5173  * Search for a layout by MDS file handle. If one is found, mark in to be
5174  * recalled, if it already marked "return on close".
5175  */
5176 static void
5177 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5178     int fhlen, struct nfsclrecalllayout **recallpp)
5179 {
5180         struct nfscllayout *lyp;
5181         uint32_t iomode;
5182
5183         if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5184             nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5185             (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5186                 return;
5187         lyp = nfscl_findlayout(clp, fhp, fhlen);
5188         if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5189             NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5190                 iomode = 0;
5191                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5192                         iomode |= NFSLAYOUTIOMODE_READ;
5193                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5194                         iomode |= NFSLAYOUTIOMODE_RW;
5195                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5196                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5197                     *recallpp);
5198                 NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5199                 *recallpp = NULL;
5200         }
5201 }
5202
5203 /*
5204  * Mark the layout to be recalled and with an error.
5205  * Also, disable the dsp from further use.
5206  */
5207 void
5208 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5209     struct nfscllayout *lyp, struct nfsclds *dsp)
5210 {
5211         struct nfsclrecalllayout *recallp;
5212         uint32_t iomode;
5213
5214         printf("DS being disabled, error=%d\n", stat);
5215         /* Set up the return of the layout. */
5216         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5217         iomode = 0;
5218         NFSLOCKCLSTATE();
5219         if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5220                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5221                         iomode |= NFSLAYOUTIOMODE_READ;
5222                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5223                         iomode |= NFSLAYOUTIOMODE_RW;
5224                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5225                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5226                     dp->nfsdi_deviceid, recallp);
5227                 NFSUNLOCKCLSTATE();
5228                 NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5229         } else {
5230                 NFSUNLOCKCLSTATE();
5231                 free(recallp, M_NFSLAYRECALL);
5232         }
5233
5234         /* And shut the TCP connection down. */
5235         nfscl_cancelreqs(dsp);
5236 }
5237
5238 /*
5239  * Cancel all RPCs for this "dsp" by closing the connection.
5240  * Also, mark the session as defunct.
5241  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5242  * cannot be shut down.
5243  */
5244 void
5245 nfscl_cancelreqs(struct nfsclds *dsp)
5246 {
5247         struct __rpc_client *cl;
5248         static int non_event;
5249
5250         NFSLOCKDS(dsp);
5251         if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5252             dsp->nfsclds_sockp != NULL &&
5253             dsp->nfsclds_sockp->nr_client != NULL) {
5254                 dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5255                 cl = dsp->nfsclds_sockp->nr_client;
5256                 dsp->nfsclds_sess.nfsess_defunct = 1;
5257                 NFSUNLOCKDS(dsp);
5258                 CLNT_CLOSE(cl);
5259                 /*
5260                  * This 1sec sleep is done to reduce the number of reconnect
5261                  * attempts made on the DS while it has failed.
5262                  */
5263                 tsleep(&non_event, PVFS, "ndscls", hz);
5264                 return;
5265         }
5266         NFSUNLOCKDS(dsp);
5267 }
5268
5269 /*
5270  * Dereference a layout.
5271  */
5272 void
5273 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5274 {
5275
5276         NFSLOCKCLSTATE();
5277         if (exclocked != 0)
5278                 nfsv4_unlock(&lyp->nfsly_lock, 0);
5279         else
5280                 nfsv4_relref(&lyp->nfsly_lock);
5281         NFSUNLOCKCLSTATE();
5282 }
5283
5284 /*
5285  * Search for a devinfo by deviceid. If one is found, return it after
5286  * acquiring a reference count on it.
5287  */
5288 struct nfscldevinfo *
5289 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5290     struct nfscldevinfo *dip)
5291 {
5292
5293         NFSLOCKCLSTATE();
5294         if (dip == NULL)
5295                 dip = nfscl_finddevinfo(clp, deviceid);
5296         if (dip != NULL)
5297                 dip->nfsdi_refcnt++;
5298         NFSUNLOCKCLSTATE();
5299         return (dip);
5300 }
5301
5302 /*
5303  * Dereference a devinfo structure.
5304  */
5305 static void
5306 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5307 {
5308
5309         dip->nfsdi_refcnt--;
5310         if (dip->nfsdi_refcnt == 0)
5311                 wakeup(&dip->nfsdi_refcnt);
5312 }
5313
5314 /*
5315  * Dereference a devinfo structure.
5316  */
5317 void
5318 nfscl_reldevinfo(struct nfscldevinfo *dip)
5319 {
5320
5321         NFSLOCKCLSTATE();
5322         nfscl_reldevinfo_locked(dip);
5323         NFSUNLOCKCLSTATE();
5324 }
5325
5326 /*
5327  * Find a layout for this file handle. Return NULL upon failure.
5328  */
5329 static struct nfscllayout *
5330 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5331 {
5332         struct nfscllayout *lyp;
5333
5334         LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5335                 if (lyp->nfsly_fhlen == fhlen &&
5336                     !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5337                         break;
5338         return (lyp);
5339 }
5340
5341 /*
5342  * Find a devinfo for this deviceid. Return NULL upon failure.
5343  */
5344 static struct nfscldevinfo *
5345 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5346 {
5347         struct nfscldevinfo *dip;
5348
5349         LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5350                 if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5351                     == 0)
5352                         break;
5353         return (dip);
5354 }
5355
5356 /*
5357  * Merge the new file layout list into the main one, maintaining it in
5358  * increasing offset order.
5359  */
5360 static void
5361 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5362     struct nfsclflayouthead *newfhlp)
5363 {
5364         struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5365
5366         flp = LIST_FIRST(fhlp);
5367         prevflp = NULL;
5368         LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5369                 while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5370                         prevflp = flp;
5371                         flp = LIST_NEXT(flp, nfsfl_list);
5372                 }
5373                 if (prevflp == NULL)
5374                         LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5375                 else
5376                         LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5377                 prevflp = nflp;
5378         }
5379 }
5380
5381 /*
5382  * Add this nfscldevinfo to the client, if it doesn't already exist.
5383  * This function consumes the structure pointed at by dip, if not NULL.
5384  */
5385 int
5386 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5387     struct nfsclflayout *flp)
5388 {
5389         struct nfsclclient *clp;
5390         struct nfscldevinfo *tdip;
5391         uint8_t *dev;
5392
5393         NFSLOCKCLSTATE();
5394         clp = nmp->nm_clp;
5395         if (clp == NULL) {
5396                 NFSUNLOCKCLSTATE();
5397                 if (dip != NULL)
5398                         free(dip, M_NFSDEVINFO);
5399                 return (ENODEV);
5400         }
5401         if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5402                 dev = flp->nfsfl_dev;
5403         else
5404                 dev = flp->nfsfl_ffm[ind].dev;
5405         tdip = nfscl_finddevinfo(clp, dev);
5406         if (tdip != NULL) {
5407                 tdip->nfsdi_layoutrefs++;
5408                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5409                         flp->nfsfl_devp = tdip;
5410                 else
5411                         flp->nfsfl_ffm[ind].devp = tdip;
5412                 nfscl_reldevinfo_locked(tdip);
5413                 NFSUNLOCKCLSTATE();
5414                 if (dip != NULL)
5415                         free(dip, M_NFSDEVINFO);
5416                 return (0);
5417         }
5418         if (dip != NULL) {
5419                 LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5420                 dip->nfsdi_layoutrefs = 1;
5421                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5422                         flp->nfsfl_devp = dip;
5423                 else
5424                         flp->nfsfl_ffm[ind].devp = dip;
5425         }
5426         NFSUNLOCKCLSTATE();
5427         if (dip == NULL)
5428                 return (ENODEV);
5429         return (0);
5430 }
5431
5432 /*
5433  * Free up a layout structure and associated file layout structure(s).
5434  */
5435 void
5436 nfscl_freelayout(struct nfscllayout *layp)
5437 {
5438         struct nfsclflayout *flp, *nflp;
5439         struct nfsclrecalllayout *rp, *nrp;
5440
5441         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5442                 LIST_REMOVE(flp, nfsfl_list);
5443                 nfscl_freeflayout(flp);
5444         }
5445         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5446                 LIST_REMOVE(flp, nfsfl_list);
5447                 nfscl_freeflayout(flp);
5448         }
5449         LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5450                 LIST_REMOVE(rp, nfsrecly_list);
5451                 free(rp, M_NFSLAYRECALL);
5452         }
5453         nfscl_layoutcnt--;
5454         free(layp, M_NFSLAYOUT);
5455 }
5456
5457 /*
5458  * Free up a file layout structure.
5459  */
5460 void
5461 nfscl_freeflayout(struct nfsclflayout *flp)
5462 {
5463         int i, j;
5464
5465         if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5466                 for (i = 0; i < flp->nfsfl_fhcnt; i++)
5467                         free(flp->nfsfl_fh[i], M_NFSFH);
5468                 if (flp->nfsfl_devp != NULL)
5469                         flp->nfsfl_devp->nfsdi_layoutrefs--;
5470         }
5471         if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5472                 for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5473                         for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5474                                 free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5475                         if (flp->nfsfl_ffm[i].devp != NULL)     
5476                                 flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;     
5477                 }
5478         free(flp, M_NFSFLAYOUT);
5479 }
5480
5481 /*
5482  * Free up a file layout devinfo structure.
5483  */
5484 void
5485 nfscl_freedevinfo(struct nfscldevinfo *dip)
5486 {
5487
5488         free(dip, M_NFSDEVINFO);
5489 }
5490
5491 /*
5492  * Mark any layouts that match as recalled.
5493  */
5494 static int
5495 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5496     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5497     char *devid, struct nfsclrecalllayout *recallp)
5498 {
5499         struct nfsclrecalllayout *rp, *orp;
5500
5501         recallp->nfsrecly_recalltype = recalltype;
5502         recallp->nfsrecly_iomode = iomode;
5503         recallp->nfsrecly_stateseqid = stateseqid;
5504         recallp->nfsrecly_off = off;
5505         recallp->nfsrecly_len = len;
5506         recallp->nfsrecly_stat = stat;
5507         recallp->nfsrecly_op = op;
5508         if (devid != NULL)
5509                 NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5510         /*
5511          * Order the list as file returns first, followed by fsid and any
5512          * returns, both in increasing stateseqid order.
5513          * Note that the seqids wrap around, so 1 is after 0xffffffff.
5514          * (I'm not sure this is correct because I find RFC5661 confusing
5515          *  on this, but hopefully it will work ok.)
5516          */
5517         orp = NULL;
5518         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5519                 orp = rp;
5520                 if ((recalltype == NFSLAYOUTRETURN_FILE &&
5521                      (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5522                       nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5523                     (recalltype != NFSLAYOUTRETURN_FILE &&
5524                      rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5525                      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5526                         LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5527                         break;
5528                 }
5529
5530                 /*
5531                  * Put any error return on all the file returns that will
5532                  * preceed this one.
5533                  */
5534                 if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5535                    stat != 0 && rp->nfsrecly_stat == 0) {
5536                         rp->nfsrecly_stat = stat;
5537                         rp->nfsrecly_op = op;
5538                         if (devid != NULL)
5539                                 NFSBCOPY(devid, rp->nfsrecly_devid,
5540                                     NFSX_V4DEVICEID);
5541                 }
5542         }
5543         if (rp == NULL) {
5544                 if (orp == NULL)
5545                         LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5546                             nfsrecly_list);
5547                 else
5548                         LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5549         }
5550         lyp->nfsly_flags |= NFSLY_RECALL;
5551         wakeup(lyp->nfsly_clp);
5552         return (0);
5553 }
5554
5555 /*
5556  * Compare the two seqids for ordering. The trick is that the seqids can
5557  * wrap around from 0xffffffff->0, so check for the cases where one
5558  * has wrapped around.
5559  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5560  */
5561 static int
5562 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5563 {
5564
5565         if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5566                 /* seqid2 has wrapped around. */
5567                 return (0);
5568         if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5569                 /* seqid1 has wrapped around. */
5570                 return (1);
5571         if (seqid1 <= seqid2)
5572                 return (1);
5573         return (0);
5574 }
5575
5576 /*
5577  * Do a layout return for each of the recalls.
5578  */
5579 static void
5580 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5581     struct ucred *cred, NFSPROC_T *p)
5582 {
5583         struct nfsclrecalllayout *rp;
5584         nfsv4stateid_t stateid;
5585         int layouttype;
5586
5587         NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5588         stateid.seqid = lyp->nfsly_stateid.seqid;
5589         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5590                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5591         else
5592                 layouttype = NFSLAYOUT_FLEXFILE;
5593         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5594                 (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5595                     lyp->nfsly_fhlen, 0, layouttype,
5596                     rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5597                     rp->nfsrecly_off, rp->nfsrecly_len,
5598                     &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5599                     rp->nfsrecly_devid);
5600         }
5601 }
5602
5603 /*
5604  * Do the layout commit for a file layout.
5605  */
5606 static void
5607 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5608     struct ucred *cred, NFSPROC_T *p)
5609 {
5610         struct nfsclflayout *flp;
5611         uint64_t len;
5612         int error, layouttype;
5613
5614         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5615                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5616         else
5617                 layouttype = NFSLAYOUT_FLEXFILE;
5618         LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5619                 if (layouttype == NFSLAYOUT_FLEXFILE &&
5620                     (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5621                         NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5622                         /* If not supported, don't bother doing it. */
5623                         NFSLOCKMNT(nmp);
5624                         nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5625                         NFSUNLOCKMNT(nmp);
5626                         break;
5627                 } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5628                         len = flp->nfsfl_end - flp->nfsfl_off;
5629                         error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5630                             lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5631                             lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5632                             layouttype, cred, p, NULL);
5633                         NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5634                         if (error == NFSERR_NOTSUPP) {
5635                                 /* If not supported, don't bother doing it. */
5636                                 NFSLOCKMNT(nmp);
5637                                 nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5638                                 NFSUNLOCKMNT(nmp);
5639                                 break;
5640                         }
5641                 }
5642         }
5643 }
5644
5645 /*
5646  * Commit all layouts for a file (vnode).
5647  */
5648 int
5649 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5650 {
5651         struct nfsclclient *clp;
5652         struct nfscllayout *lyp;
5653         struct nfsnode *np = VTONFS(vp);
5654         mount_t mp;
5655         struct nfsmount *nmp;
5656
5657         mp = vp->v_mount;
5658         nmp = VFSTONFS(mp);
5659         if (NFSHASNOLAYOUTCOMMIT(nmp))
5660                 return (0);
5661         NFSLOCKCLSTATE();
5662         clp = nmp->nm_clp;
5663         if (clp == NULL) {
5664                 NFSUNLOCKCLSTATE();
5665                 return (EPERM);
5666         }
5667         lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5668         if (lyp == NULL) {
5669                 NFSUNLOCKCLSTATE();
5670                 return (EPERM);
5671         }
5672         nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5673         if (NFSCL_FORCEDISM(mp)) {
5674                 NFSUNLOCKCLSTATE();
5675                 return (EPERM);
5676         }
5677 tryagain:
5678         if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5679                 lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5680                 NFSUNLOCKCLSTATE();
5681                 NFSCL_DEBUG(4, "do layoutcommit2\n");
5682                 nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5683                 NFSLOCKCLSTATE();
5684                 goto tryagain;
5685         }
5686         nfsv4_relref(&lyp->nfsly_lock);
5687         NFSUNLOCKCLSTATE();
5688         return (0);
5689 }