]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clstate.c
nfscl: fix delegation recall when the file is not open
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clstate.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2009 Rick Macklem, University of Guelph
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 /*
34  * These functions implement the client side state handling for NFSv4.
35  * NFSv4 state handling:
36  * - A lockowner is used to determine lock contention, so it
37  *   corresponds directly to a Posix pid. (1 to 1 mapping)
38  * - The correct granularity of an OpenOwner is not nearly so
39  *   obvious. An OpenOwner does the following:
40  *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
41  *   - is used to check for Open/Share contention (not applicable to
42  *     this client, since all Opens are Deny_None)
43  *   As such, I considered both extreme.
44  *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
45  *   all Open, Close and Lock (with a new lockowner) Ops.
46  *   1 OpenOwner for each Open - This one results in an OpenConfirm for
47  *   every Open, for most servers.
48  *   So, I chose to use the same mapping as I did for LockOwnwers.
49  *   The main concern here is that you can end up with multiple Opens
50  *   for the same File Handle, but on different OpenOwners (opens
51  *   inherited from parents, grandparents...) and you do not know
52  *   which of these the vnodeop close applies to. This is handled by
53  *   delaying the Close Op(s) until all of the Opens have been closed.
54  *   (It is not yet obvious if this is the correct granularity.)
55  * - How the code handles serialization:
56  *   - For the ClientId, it uses an exclusive lock while getting its
57  *     SetClientId and during recovery. Otherwise, it uses a shared
58  *     lock via a reference count.
59  *   - For the rest of the data structures, it uses an SMP mutex
60  *     (once the nfs client is SMP safe) and doesn't sleep while
61  *     manipulating the linked lists.
62  *   - The serialization of Open/Close/Lock/LockU falls out in the
63  *     "wash", since OpenOwners and LockOwners are both mapped from
64  *     Posix pid. In other words, there is only one Posix pid using
65  *     any given owner, so that owner is serialized. (If you change
66  *     the granularity of the OpenOwner, then code must be added to
67  *     serialize Ops on the OpenOwner.)
68  * - When to get rid of OpenOwners and LockOwners.
69  *   - The function nfscl_cleanup_common() is executed after a process exits.
70  *     It goes through the client list looking for all Open and Lock Owners.
71  *     When one is found, it is marked "defunct" or in the case of
72  *     an OpenOwner without any Opens, freed.
73  *     The renew thread scans for defunct Owners and gets rid of them,
74  *     if it can. The LockOwners will also be deleted when the
75  *     associated Open is closed.
76  *   - If the LockU or Close Op(s) fail during close in a way
77  *     that could be recovered upon retry, they are relinked to the
78  *     ClientId's defunct open list and retried by the renew thread
79  *     until they succeed or an unmount/recovery occurs.
80  *     (Since we are done with them, they do not need to be recovered.)
81  */
82
83 #include <fs/nfs/nfsport.h>
84
85 /*
86  * Global variables
87  */
88 extern struct nfsstatsv1 nfsstatsv1;
89 extern struct nfsreqhead nfsd_reqq;
90 extern u_int32_t newnfs_false, newnfs_true;
91 extern int nfscl_debuglevel;
92 extern int nfscl_enablecallb;
93 extern int nfs_numnfscbd;
94 NFSREQSPINLOCK;
95 NFSCLSTATEMUTEX;
96 int nfscl_inited = 0;
97 struct nfsclhead nfsclhead;     /* Head of clientid list */
98 int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99 int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101 static int nfscl_delegcnt = 0;
102 static int nfscl_layoutcnt = 0;
103 static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *,
104     u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **);
105 static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
106     uint8_t *, struct nfscllockowner **, struct nfsclopen **,
107     struct nfsclopen **);
108 static void nfscl_clrelease(struct nfsclclient *);
109 static void nfscl_cleanclient(struct nfsclclient *);
110 static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
111     struct ucred *, NFSPROC_T *);
112 static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
113     struct nfsmount *, struct ucred *, NFSPROC_T *);
114 static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
115     NFSPROC_T *);
116 static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
117     struct nfscllock *, int);
118 static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
119     struct nfscllock **, int);
120 static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *);
121 static u_int32_t nfscl_nextcbident(void);
122 static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
123 static struct nfsclclient *nfscl_getclnt(u_int32_t);
124 static struct nfsclclient *nfscl_getclntsess(uint8_t *);
125 static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
126     int);
127 static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
128     int, struct nfsclrecalllayout **);
129 static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
130 static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
131     int);
132 static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
133 static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
134     u_int8_t *, struct nfscllock **);
135 static void nfscl_freealllocks(struct nfscllockownerhead *, int);
136 static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
137     struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
138 static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
139     struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
140     struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
141 static int nfscl_moveopen(vnode_t , struct nfsclclient *,
142     struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
143     struct nfscldeleg *, struct ucred *, NFSPROC_T *);
144 static void nfscl_totalrecall(struct nfsclclient *);
145 static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
146     struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
147 static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
148     u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
149     struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
150 static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
151     int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
152     struct ucred *, NFSPROC_T *);
153 static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
154     struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
155 static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *);
156 static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
157 static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
158 static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
159     struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
160     vnode_t *);
161 static void nfscl_freeopenowner(struct nfsclowner *, int);
162 static void nfscl_cleandeleg(struct nfscldeleg *);
163 static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *,
164     struct nfsmount *, NFSPROC_T *);
165 static void nfscl_emptylockowner(struct nfscllockowner *,
166     struct nfscllockownerfhhead *);
167 static void nfscl_mergeflayouts(struct nfsclflayouthead *,
168     struct nfsclflayouthead *);
169 static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
170     uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
171 static int nfscl_seq(uint32_t, uint32_t);
172 static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
173     struct ucred *, NFSPROC_T *);
174 static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
175     struct ucred *, NFSPROC_T *);
176
177 static short nfscberr_null[] = {
178         0,
179         0,
180 };
181
182 static short nfscberr_getattr[] = {
183         NFSERR_RESOURCE,
184         NFSERR_BADHANDLE,
185         NFSERR_BADXDR,
186         NFSERR_RESOURCE,
187         NFSERR_SERVERFAULT,
188         0,
189 };
190
191 static short nfscberr_recall[] = {
192         NFSERR_RESOURCE,
193         NFSERR_BADHANDLE,
194         NFSERR_BADSTATEID,
195         NFSERR_BADXDR,
196         NFSERR_RESOURCE,
197         NFSERR_SERVERFAULT,
198         0,
199 };
200
201 static short *nfscl_cberrmap[] = {
202         nfscberr_null,
203         nfscberr_null,
204         nfscberr_null,
205         nfscberr_getattr,
206         nfscberr_recall
207 };
208
209 #define NETFAMILY(clp) \
210                 (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)
211
212 /*
213  * Called for an open operation.
214  * If the nfhp argument is NULL, just get an openowner.
215  */
216 int
217 nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
218     struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
219     struct nfsclopen **opp, int *newonep, int *retp, int lockit)
220 {
221         struct nfsclclient *clp;
222         struct nfsclowner *owp, *nowp;
223         struct nfsclopen *op = NULL, *nop = NULL;
224         struct nfscldeleg *dp;
225         struct nfsclownerhead *ohp;
226         u_int8_t own[NFSV4CL_LOCKNAMELEN];
227         int ret;
228
229         if (newonep != NULL)
230                 *newonep = 0;
231         if (opp != NULL)
232                 *opp = NULL;
233         if (owpp != NULL)
234                 *owpp = NULL;
235
236         /*
237          * Might need one or both of these, so MALLOC them now, to
238          * avoid a tsleep() in MALLOC later.
239          */
240         nowp = malloc(sizeof (struct nfsclowner),
241             M_NFSCLOWNER, M_WAITOK);
242         if (nfhp != NULL)
243             nop = malloc(sizeof (struct nfsclopen) +
244                 fhlen - 1, M_NFSCLOPEN, M_WAITOK);
245         ret = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
246         if (ret != 0) {
247                 free(nowp, M_NFSCLOWNER);
248                 if (nop != NULL)
249                         free(nop, M_NFSCLOPEN);
250                 return (ret);
251         }
252
253         /*
254          * Get the Open iff it already exists.
255          * If none found, add the new one or return error, depending upon
256          * "create".
257          */
258         NFSLOCKCLSTATE();
259         dp = NULL;
260         /* First check the delegation list */
261         if (nfhp != NULL && usedeleg) {
262                 LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
263                         if (dp->nfsdl_fhlen == fhlen &&
264                             !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
265                                 if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
266                                     (dp->nfsdl_flags & NFSCLDL_WRITE))
267                                         break;
268                                 dp = NULL;
269                                 break;
270                         }
271                 }
272         }
273
274         /* For NFSv4.1/4.2 and this option, use a single open_owner. */
275         if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
276                 nfscl_filllockowner(NULL, own, F_POSIX);
277         else
278                 nfscl_filllockowner(p->td_proc, own, F_POSIX);
279         if (dp != NULL)
280                 ohp = &dp->nfsdl_owner;
281         else
282                 ohp = &clp->nfsc_owner;
283         /* Now, search for an openowner */
284         LIST_FOREACH(owp, ohp, nfsow_list) {
285                 if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
286                         break;
287         }
288
289         /*
290          * Create a new open, as required.
291          */
292         nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
293             cred, newonep);
294
295         /*
296          * Now, check the mode on the open and return the appropriate
297          * value.
298          */
299         if (retp != NULL) {
300                 if (nfhp != NULL && dp != NULL && nop == NULL)
301                         /* new local open on delegation */
302                         *retp = NFSCLOPEN_SETCRED;
303                 else
304                         *retp = NFSCLOPEN_OK;
305         }
306         if (op != NULL && (amode & ~(op->nfso_mode))) {
307                 op->nfso_mode |= amode;
308                 if (retp != NULL && dp == NULL)
309                         *retp = NFSCLOPEN_DOOPEN;
310         }
311
312         /*
313          * Serialize modifications to the open owner for multiple threads
314          * within the same process using a read/write sleep lock.
315          * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
316          * by acquiring a shared lock.  The close operations still use an
317          * exclusive lock for this case.
318          */
319         if (lockit != 0) {
320                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
321                         /*
322                          * Get a shared lock on the OpenOwner, but first
323                          * wait for any pending exclusive lock, so that the
324                          * exclusive locker gets priority.
325                          */
326                         nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
327                             NFSCLSTATEMUTEXPTR, NULL);
328                         nfsv4_getref(&owp->nfsow_rwlock, NULL,
329                             NFSCLSTATEMUTEXPTR, NULL);
330                 } else
331                         nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
332         }
333         NFSUNLOCKCLSTATE();
334         if (nowp != NULL)
335                 free(nowp, M_NFSCLOWNER);
336         if (nop != NULL)
337                 free(nop, M_NFSCLOPEN);
338         if (owpp != NULL)
339                 *owpp = owp;
340         if (opp != NULL)
341                 *opp = op;
342         return (0);
343 }
344
345 /*
346  * Create a new open, as required.
347  */
348 static void
349 nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
350     struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
351     struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
352     struct ucred *cred, int *newonep)
353 {
354         struct nfsclowner *owp = *owpp, *nowp;
355         struct nfsclopen *op, *nop;
356
357         if (nowpp != NULL)
358                 nowp = *nowpp;
359         else
360                 nowp = NULL;
361         if (nopp != NULL)
362                 nop = *nopp;
363         else
364                 nop = NULL;
365         if (owp == NULL && nowp != NULL) {
366                 NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
367                 LIST_INIT(&nowp->nfsow_open);
368                 nowp->nfsow_clp = clp;
369                 nowp->nfsow_seqid = 0;
370                 nowp->nfsow_defunct = 0;
371                 nfscl_lockinit(&nowp->nfsow_rwlock);
372                 if (dp != NULL) {
373                         nfsstatsv1.cllocalopenowners++;
374                         LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
375                 } else {
376                         nfsstatsv1.clopenowners++;
377                         LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
378                 }
379                 owp = *owpp = nowp;
380                 *nowpp = NULL;
381                 if (newonep != NULL)
382                         *newonep = 1;
383         }
384
385          /* If an fhp has been specified, create an Open as well. */
386         if (fhp != NULL) {
387                 /* and look for the correct open, based upon FH */
388                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
389                         if (op->nfso_fhlen == fhlen &&
390                             !NFSBCMP(op->nfso_fh, fhp, fhlen))
391                                 break;
392                 }
393                 if (op == NULL && nop != NULL) {
394                         nop->nfso_own = owp;
395                         nop->nfso_mode = 0;
396                         nop->nfso_opencnt = 0;
397                         nop->nfso_posixlock = 1;
398                         nop->nfso_fhlen = fhlen;
399                         NFSBCOPY(fhp, nop->nfso_fh, fhlen);
400                         LIST_INIT(&nop->nfso_lock);
401                         nop->nfso_stateid.seqid = 0;
402                         nop->nfso_stateid.other[0] = 0;
403                         nop->nfso_stateid.other[1] = 0;
404                         nop->nfso_stateid.other[2] = 0;
405                         KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
406                         newnfs_copyincred(cred, &nop->nfso_cred);
407                         if (dp != NULL) {
408                                 TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
409                                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
410                                     nfsdl_list);
411                                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
412                                 nfsstatsv1.cllocalopens++;
413                         } else {
414                                 nfsstatsv1.clopens++;
415                         }
416                         LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
417                         *opp = nop;
418                         *nopp = NULL;
419                         if (newonep != NULL)
420                                 *newonep = 1;
421                 } else {
422                         *opp = op;
423                 }
424         }
425 }
426
427 /*
428  * Called to find/add a delegation to a client.
429  */
430 int
431 nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
432     int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
433 {
434         struct nfscldeleg *dp = *dpp, *tdp;
435
436         /*
437          * First, if we have received a Read delegation for a file on a
438          * read/write file system, just return it, because they aren't
439          * useful, imho.
440          */
441         if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) &&
442             (dp->nfsdl_flags & NFSCLDL_READ)) {
443                 (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p);
444                 free(dp, M_NFSCLDELEG);
445                 *dpp = NULL;
446                 return (0);
447         }
448
449         /* Look for the correct deleg, based upon FH */
450         NFSLOCKCLSTATE();
451         tdp = nfscl_finddeleg(clp, nfhp, fhlen);
452         if (tdp == NULL) {
453                 if (dp == NULL) {
454                         NFSUNLOCKCLSTATE();
455                         return (NFSERR_BADSTATEID);
456                 }
457                 *dpp = NULL;
458                 TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
459                 LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
460                     nfsdl_hash);
461                 dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
462                 nfsstatsv1.cldelegates++;
463                 nfscl_delegcnt++;
464         } else {
465                 /*
466                  * Delegation already exists, what do we do if a new one??
467                  */
468                 if (dp != NULL) {
469                         printf("Deleg already exists!\n");
470                         free(dp, M_NFSCLDELEG);
471                         *dpp = NULL;
472                 } else {
473                         *dpp = tdp;
474                 }
475         }
476         NFSUNLOCKCLSTATE();
477         return (0);
478 }
479
480 /*
481  * Find a delegation for this file handle. Return NULL upon failure.
482  */
483 static struct nfscldeleg *
484 nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
485 {
486         struct nfscldeleg *dp;
487
488         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
489             if (dp->nfsdl_fhlen == fhlen &&
490                 !NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
491                 break;
492         }
493         return (dp);
494 }
495
496 /*
497  * Get a stateid for an I/O operation. First, look for an open and iff
498  * found, return either a lockowner stateid or the open stateid.
499  * If no Open is found, just return error and the special stateid of all zeros.
500  */
501 int
502 nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
503     int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
504     void **lckpp)
505 {
506         struct nfsclclient *clp;
507         struct nfsclowner *owp;
508         struct nfsclopen *op = NULL, *top;
509         struct nfscllockowner *lp;
510         struct nfscldeleg *dp;
511         struct nfsnode *np;
512         struct nfsmount *nmp;
513         u_int8_t own[NFSV4CL_LOCKNAMELEN];
514         int error;
515         bool done;
516
517         *lckpp = NULL;
518         /*
519          * Initially, just set the special stateid of all zeros.
520          * (Don't do this for a DS, since the special stateid can't be used.)
521          */
522         if (fords == 0) {
523                 stateidp->seqid = 0;
524                 stateidp->other[0] = 0;
525                 stateidp->other[1] = 0;
526                 stateidp->other[2] = 0;
527         }
528         if (vnode_vtype(vp) != VREG)
529                 return (EISDIR);
530         np = VTONFS(vp);
531         nmp = VFSTONFS(vp->v_mount);
532         NFSLOCKCLSTATE();
533         clp = nfscl_findcl(nmp);
534         if (clp == NULL) {
535                 NFSUNLOCKCLSTATE();
536                 return (EACCES);
537         }
538
539         /*
540          * Wait for recovery to complete.
541          */
542         while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
543                 (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
544                     PZERO, "nfsrecvr", NULL);
545
546         /*
547          * First, look for a delegation.
548          */
549         LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
550                 if (dp->nfsdl_fhlen == fhlen &&
551                     !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
552                         if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
553                             (dp->nfsdl_flags & NFSCLDL_WRITE)) {
554                                 stateidp->seqid = dp->nfsdl_stateid.seqid;
555                                 stateidp->other[0] = dp->nfsdl_stateid.other[0];
556                                 stateidp->other[1] = dp->nfsdl_stateid.other[1];
557                                 stateidp->other[2] = dp->nfsdl_stateid.other[2];
558                                 if (!(np->n_flag & NDELEGRECALL)) {
559                                         TAILQ_REMOVE(&clp->nfsc_deleg, dp,
560                                             nfsdl_list);
561                                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
562                                             nfsdl_list);
563                                         dp->nfsdl_timestamp = NFSD_MONOSEC +
564                                             120;
565                                         dp->nfsdl_rwlock.nfslock_usecnt++;
566                                         *lckpp = (void *)&dp->nfsdl_rwlock;
567                                 }
568                                 NFSUNLOCKCLSTATE();
569                                 return (0);
570                         }
571                         break;
572                 }
573         }
574
575         if (p != NULL) {
576                 /*
577                  * If p != NULL, we want to search the parentage tree
578                  * for a matching OpenOwner and use that.
579                  */
580                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
581                         nfscl_filllockowner(NULL, own, F_POSIX);
582                 else
583                         nfscl_filllockowner(p->td_proc, own, F_POSIX);
584                 lp = NULL;
585                 error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
586                     mode, &lp, &op);
587                 if (error == 0 && lp != NULL && fords == 0) {
588                         /* Don't return a lock stateid for a DS. */
589                         stateidp->seqid =
590                             lp->nfsl_stateid.seqid;
591                         stateidp->other[0] =
592                             lp->nfsl_stateid.other[0];
593                         stateidp->other[1] =
594                             lp->nfsl_stateid.other[1];
595                         stateidp->other[2] =
596                             lp->nfsl_stateid.other[2];
597                         NFSUNLOCKCLSTATE();
598                         return (0);
599                 }
600         }
601         if (op == NULL) {
602                 /* If not found, just look for any OpenOwner that will work. */
603                 top = NULL;
604                 done = false;
605                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
606                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
607                                 if (op->nfso_fhlen == fhlen &&
608                                     !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
609                                         if (top == NULL && (op->nfso_mode &
610                                             NFSV4OPEN_ACCESSWRITE) != 0 &&
611                                             (mode & NFSV4OPEN_ACCESSREAD) != 0)
612                                                 top = op;
613                                         if ((mode & op->nfso_mode) == mode) {
614                                                 done = true;
615                                                 break;
616                                         }
617                                 }
618                         }
619                         if (done)
620                                 break;
621                 }
622                 if (!done) {
623                         NFSCL_DEBUG(2, "openmode top=%p\n", top);
624                         if (top == NULL || NFSHASOPENMODE(nmp)) {
625                                 NFSUNLOCKCLSTATE();
626                                 return (ENOENT);
627                         } else
628                                 op = top;
629                 }
630                 /*
631                  * For read aheads or write behinds, use the open cred.
632                  * A read ahead or write behind is indicated by p == NULL.
633                  */
634                 if (p == NULL)
635                         newnfs_copycred(&op->nfso_cred, cred);
636         }
637
638         /*
639          * No lock stateid, so return the open stateid.
640          */
641         stateidp->seqid = op->nfso_stateid.seqid;
642         stateidp->other[0] = op->nfso_stateid.other[0];
643         stateidp->other[1] = op->nfso_stateid.other[1];
644         stateidp->other[2] = op->nfso_stateid.other[2];
645         NFSUNLOCKCLSTATE();
646         return (0);
647 }
648
649 /*
650  * Search for a matching file, mode and, optionally, lockowner.
651  */
652 static int
653 nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
654     u_int8_t *openown, u_int8_t *lockown, u_int32_t mode,
655     struct nfscllockowner **lpp, struct nfsclopen **opp)
656 {
657         struct nfsclowner *owp;
658         struct nfsclopen *op, *rop, *rop2;
659         bool keep_looping;
660
661         if (lpp != NULL)
662                 *lpp = NULL;
663         /*
664          * rop will be set to the open to be returned. There are three
665          * variants of this, all for an open of the correct file:
666          * 1 - A match of lockown.
667          * 2 - A match of the openown, when no lockown match exists.
668          * 3 - A match for any open, if no openown or lockown match exists.
669          * Looking for #2 over #3 probably isn't necessary, but since
670          * RFC3530 is vague w.r.t. the relationship between openowners and
671          * lockowners, I think this is the safer way to go.
672          */
673         rop = NULL;
674         rop2 = NULL;
675         keep_looping = true;
676         /* Search the client list */
677         LIST_FOREACH(owp, ohp, nfsow_list) {
678                 /* and look for the correct open */
679                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
680                         if (op->nfso_fhlen == fhlen &&
681                             !NFSBCMP(op->nfso_fh, nfhp, fhlen)
682                             && (op->nfso_mode & mode) == mode)
683                                 keep_looping = nfscl_checkown(owp, op, openown,
684                                     lockown, lpp, &rop, &rop2);
685                         if (!keep_looping)
686                                 break;
687                 }
688                 if (!keep_looping)
689                         break;
690         }
691         if (rop == NULL)
692                 rop = rop2;
693         if (rop == NULL)
694                 return (EBADF);
695         *opp = rop;
696         return (0);
697 }
698
699 /* Check for an owner match. */
700 static bool
701 nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
702     uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
703     struct nfsclopen **ropp2)
704 {
705         struct nfscllockowner *lp;
706         bool keep_looping;
707
708         keep_looping = true;
709         if (lpp != NULL) {
710                 /* Now look for a matching lockowner. */
711                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
712                         if (!NFSBCMP(lp->nfsl_owner, lockown,
713                             NFSV4CL_LOCKNAMELEN)) {
714                                 *lpp = lp;
715                                 *ropp = op;
716                                 return (false);
717                         }
718                 }
719         }
720         if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
721             NFSV4CL_LOCKNAMELEN)) {
722                 *ropp = op;
723                 if (lpp == NULL)
724                         keep_looping = false;
725         }
726         if (*ropp2 == NULL)
727                 *ropp2 = op;
728         return (keep_looping);
729 }
730
731 /*
732  * Release use of an open owner. Called when open operations are done
733  * with the open owner.
734  */
735 void
736 nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
737     __unused int error, __unused int candelete, int unlocked)
738 {
739
740         if (owp == NULL)
741                 return;
742         NFSLOCKCLSTATE();
743         if (unlocked == 0) {
744                 if (NFSHASONEOPENOWN(nmp))
745                         nfsv4_relref(&owp->nfsow_rwlock);
746                 else
747                         nfscl_lockunlock(&owp->nfsow_rwlock);
748         }
749         nfscl_clrelease(owp->nfsow_clp);
750         NFSUNLOCKCLSTATE();
751 }
752
753 /*
754  * Release use of an open structure under an open owner.
755  */
756 void
757 nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
758     int candelete)
759 {
760         struct nfsclclient *clp;
761         struct nfsclowner *owp;
762
763         if (op == NULL)
764                 return;
765         NFSLOCKCLSTATE();
766         owp = op->nfso_own;
767         if (NFSHASONEOPENOWN(nmp))
768                 nfsv4_relref(&owp->nfsow_rwlock);
769         else
770                 nfscl_lockunlock(&owp->nfsow_rwlock);
771         clp = owp->nfsow_clp;
772         if (error && candelete && op->nfso_opencnt == 0)
773                 nfscl_freeopen(op, 0);
774         nfscl_clrelease(clp);
775         NFSUNLOCKCLSTATE();
776 }
777
778 /*
779  * Called to get a clientid structure. It will optionally lock the
780  * client data structures to do the SetClientId/SetClientId_confirm,
781  * but will release that lock and return the clientid with a reference
782  * count on it.
783  * If the "cred" argument is NULL, a new clientid should not be created.
784  * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
785  * be done.
786  * The start_renewthread argument tells nfscl_getcl() to start a renew
787  * thread if this creates a new clp.
788  * It always clpp with a reference count on it, unless returning an error.
789  */
790 int
791 nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
792     int start_renewthread, struct nfsclclient **clpp)
793 {
794         struct nfsclclient *clp;
795         struct nfsclclient *newclp = NULL;
796         struct nfsmount *nmp;
797         char uuid[HOSTUUIDLEN];
798         int igotlock = 0, error, trystalecnt, clidinusedelay, i;
799         u_int16_t idlen = 0;
800
801         nmp = VFSTONFS(mp);
802         if (cred != NULL) {
803                 getcredhostuuid(cred, uuid, sizeof uuid);
804                 idlen = strlen(uuid);
805                 if (idlen > 0)
806                         idlen += sizeof (u_int64_t);
807                 else
808                         idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
809                 newclp = malloc(
810                     sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
811                     M_WAITOK | M_ZERO);
812         }
813         NFSLOCKCLSTATE();
814         /*
815          * If a forced dismount is already in progress, don't
816          * allocate a new clientid and get out now. For the case where
817          * clp != NULL, this is a harmless optimization.
818          */
819         if (NFSCL_FORCEDISM(mp)) {
820                 NFSUNLOCKCLSTATE();
821                 if (newclp != NULL)
822                         free(newclp, M_NFSCLCLIENT);
823                 return (EBADF);
824         }
825         clp = nmp->nm_clp;
826         if (clp == NULL) {
827                 if (newclp == NULL) {
828                         NFSUNLOCKCLSTATE();
829                         return (EACCES);
830                 }
831                 clp = newclp;
832                 clp->nfsc_idlen = idlen;
833                 LIST_INIT(&clp->nfsc_owner);
834                 TAILQ_INIT(&clp->nfsc_deleg);
835                 TAILQ_INIT(&clp->nfsc_layout);
836                 LIST_INIT(&clp->nfsc_devinfo);
837                 for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
838                         LIST_INIT(&clp->nfsc_deleghash[i]);
839                 for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
840                         LIST_INIT(&clp->nfsc_layouthash[i]);
841                 clp->nfsc_flags = NFSCLFLAGS_INITED;
842                 clp->nfsc_clientidrev = 1;
843                 clp->nfsc_cbident = nfscl_nextcbident();
844                 nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
845                     clp->nfsc_idlen);
846                 LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
847                 nmp->nm_clp = clp;
848                 clp->nfsc_nmp = nmp;
849                 NFSUNLOCKCLSTATE();
850                 if (start_renewthread != 0)
851                         nfscl_start_renewthread(clp);
852         } else {
853                 NFSUNLOCKCLSTATE();
854                 if (newclp != NULL)
855                         free(newclp, M_NFSCLCLIENT);
856         }
857         NFSLOCKCLSTATE();
858         while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
859             !NFSCL_FORCEDISM(mp))
860                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
861                     NFSCLSTATEMUTEXPTR, mp);
862         if (igotlock == 0) {
863                 /*
864                  * Call nfsv4_lock() with "iwantlock == 0" so that it will
865                  * wait for a pending exclusive lock request.  This gives the
866                  * exclusive lock request priority over this shared lock
867                  * request.
868                  * An exclusive lock on nfsc_lock is used mainly for server
869                  * crash recoveries.
870                  */
871                 nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp);
872                 nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
873         }
874         if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
875                 /*
876                  * Both nfsv4_lock() and nfsv4_getref() know to check
877                  * for NFSCL_FORCEDISM() and return without sleeping to
878                  * wait for the exclusive lock to be released, since it
879                  * might be held by nfscl_umount() and we need to get out
880                  * now for that case and not wait until nfscl_umount()
881                  * releases it.
882                  */
883                 NFSUNLOCKCLSTATE();
884                 return (EBADF);
885         }
886         NFSUNLOCKCLSTATE();
887
888         /*
889          * If it needs a clientid, do the setclientid now.
890          */
891         if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
892                 if (!igotlock)
893                         panic("nfscl_clget");
894                 if (p == NULL || cred == NULL) {
895                         NFSLOCKCLSTATE();
896                         nfsv4_unlock(&clp->nfsc_lock, 0);
897                         NFSUNLOCKCLSTATE();
898                         return (EACCES);
899                 }
900                 /*
901                  * If RFC3530 Sec. 14.2.33 is taken literally,
902                  * NFSERR_CLIDINUSE will be returned persistently for the
903                  * case where a new mount of the same file system is using
904                  * a different principal. In practice, NFSERR_CLIDINUSE is
905                  * only returned when there is outstanding unexpired state
906                  * on the clientid. As such, try for twice the lease
907                  * interval, if we know what that is. Otherwise, make a
908                  * wild ass guess.
909                  * The case of returning NFSERR_STALECLIENTID is far less
910                  * likely, but might occur if there is a significant delay
911                  * between doing the SetClientID and SetClientIDConfirm Ops,
912                  * such that the server throws away the clientid before
913                  * receiving the SetClientIDConfirm.
914                  */
915                 if (clp->nfsc_renew > 0)
916                         clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
917                 else
918                         clidinusedelay = 120;
919                 trystalecnt = 3;
920                 do {
921                         error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
922                         if (error == NFSERR_STALECLIENTID ||
923                             error == NFSERR_STALEDONTRECOVER ||
924                             error == NFSERR_BADSESSION ||
925                             error == NFSERR_CLIDINUSE) {
926                                 (void) nfs_catnap(PZERO, error, "nfs_setcl");
927                         }
928                 } while (((error == NFSERR_STALECLIENTID ||
929                      error == NFSERR_BADSESSION ||
930                      error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) ||
931                     (error == NFSERR_CLIDINUSE && --clidinusedelay > 0));
932                 if (error) {
933                         NFSLOCKCLSTATE();
934                         nfsv4_unlock(&clp->nfsc_lock, 0);
935                         NFSUNLOCKCLSTATE();
936                         return (error);
937                 }
938                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
939         }
940         if (igotlock) {
941                 NFSLOCKCLSTATE();
942                 nfsv4_unlock(&clp->nfsc_lock, 1);
943                 NFSUNLOCKCLSTATE();
944         }
945
946         *clpp = clp;
947         return (0);
948 }
949
950 /*
951  * Get a reference to a clientid and return it, if valid.
952  */
953 struct nfsclclient *
954 nfscl_findcl(struct nfsmount *nmp)
955 {
956         struct nfsclclient *clp;
957
958         clp = nmp->nm_clp;
959         if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID))
960                 return (NULL);
961         return (clp);
962 }
963
964 /*
965  * Release the clientid structure. It may be locked or reference counted.
966  */
967 static void
968 nfscl_clrelease(struct nfsclclient *clp)
969 {
970
971         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
972                 nfsv4_unlock(&clp->nfsc_lock, 0);
973         else
974                 nfsv4_relref(&clp->nfsc_lock);
975 }
976
977 /*
978  * External call for nfscl_clrelease.
979  */
980 void
981 nfscl_clientrelease(struct nfsclclient *clp)
982 {
983
984         NFSLOCKCLSTATE();
985         if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK)
986                 nfsv4_unlock(&clp->nfsc_lock, 0);
987         else
988                 nfsv4_relref(&clp->nfsc_lock);
989         NFSUNLOCKCLSTATE();
990 }
991
992 /*
993  * Called when wanting to lock a byte region.
994  */
995 int
996 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
997     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
998     int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
999     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
1000 {
1001         struct nfscllockowner *lp;
1002         struct nfsclopen *op;
1003         struct nfsclclient *clp;
1004         struct nfscllockowner *nlp;
1005         struct nfscllock *nlop, *otherlop;
1006         struct nfscldeleg *dp = NULL, *ldp = NULL;
1007         struct nfscllockownerhead *lhp = NULL;
1008         struct nfsnode *np;
1009         u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN];
1010         u_int8_t *openownp;
1011         int error = 0, ret, donelocally = 0;
1012         u_int32_t mode;
1013
1014         /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */
1015         mode = 0;
1016         np = VTONFS(vp);
1017         *lpp = NULL;
1018         lp = NULL;
1019         *newonep = 0;
1020         *donelocallyp = 0;
1021
1022         /*
1023          * Might need these, so MALLOC them now, to
1024          * avoid a tsleep() in MALLOC later.
1025          */
1026         nlp = malloc(
1027             sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK);
1028         otherlop = malloc(
1029             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1030         nlop = malloc(
1031             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1032         nlop->nfslo_type = type;
1033         nlop->nfslo_first = off;
1034         if (len == NFS64BITSSET) {
1035                 nlop->nfslo_end = NFS64BITSSET;
1036         } else {
1037                 nlop->nfslo_end = off + len;
1038                 if (nlop->nfslo_end <= nlop->nfslo_first)
1039                         error = NFSERR_INVAL;
1040         }
1041
1042         if (!error) {
1043                 if (recovery)
1044                         clp = rclp;
1045                 else
1046                         error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1047         }
1048         if (error) {
1049                 free(nlp, M_NFSCLLOCKOWNER);
1050                 free(otherlop, M_NFSCLLOCK);
1051                 free(nlop, M_NFSCLLOCK);
1052                 return (error);
1053         }
1054
1055         op = NULL;
1056         if (recovery) {
1057                 ownp = rownp;
1058                 openownp = ropenownp;
1059         } else {
1060                 nfscl_filllockowner(id, own, flags);
1061                 ownp = own;
1062                 if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
1063                         nfscl_filllockowner(NULL, openown, F_POSIX);
1064                 else
1065                         nfscl_filllockowner(p->td_proc, openown, F_POSIX);
1066                 openownp = openown;
1067         }
1068         if (!recovery) {
1069                 NFSLOCKCLSTATE();
1070                 /*
1071                  * First, search for a delegation. If one exists for this file,
1072                  * the lock can be done locally against it, so long as there
1073                  * isn't a local lock conflict.
1074                  */
1075                 ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1076                     np->n_fhp->nfh_len);
1077                 /* Just sanity check for correct type of delegation */
1078                 if (dp != NULL && ((dp->nfsdl_flags &
1079                     (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 ||
1080                      (type == F_WRLCK &&
1081                       (dp->nfsdl_flags & NFSCLDL_WRITE) == 0)))
1082                         dp = NULL;
1083         }
1084         if (dp != NULL) {
1085                 /* Now, find an open and maybe a lockowner. */
1086                 ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh,
1087                     np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op);
1088                 if (ret)
1089                         ret = nfscl_getopen(&clp->nfsc_owner,
1090                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1091                             ownp, mode, NULL, &op);
1092                 if (!ret) {
1093                         lhp = &dp->nfsdl_lock;
1094                         TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
1095                         TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
1096                         dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
1097                         donelocally = 1;
1098                 } else {
1099                         dp = NULL;
1100                 }
1101         }
1102         if (!donelocally) {
1103                 /*
1104                  * Get the related Open and maybe lockowner.
1105                  */
1106                 error = nfscl_getopen(&clp->nfsc_owner,
1107                     np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp,
1108                     ownp, mode, &lp, &op);
1109                 if (!error)
1110                         lhp = &op->nfso_lock;
1111         }
1112         if (!error && !recovery)
1113                 error = nfscl_localconflict(clp, np->n_fhp->nfh_fh,
1114                     np->n_fhp->nfh_len, nlop, ownp, ldp, NULL);
1115         if (error) {
1116                 if (!recovery) {
1117                         nfscl_clrelease(clp);
1118                         NFSUNLOCKCLSTATE();
1119                 }
1120                 free(nlp, M_NFSCLLOCKOWNER);
1121                 free(otherlop, M_NFSCLLOCK);
1122                 free(nlop, M_NFSCLLOCK);
1123                 return (error);
1124         }
1125
1126         /*
1127          * Ok, see if a lockowner exists and create one, as required.
1128          */
1129         if (lp == NULL)
1130                 LIST_FOREACH(lp, lhp, nfsl_list) {
1131                         if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN))
1132                                 break;
1133                 }
1134         if (lp == NULL) {
1135                 NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN);
1136                 if (recovery)
1137                         NFSBCOPY(ropenownp, nlp->nfsl_openowner,
1138                             NFSV4CL_LOCKNAMELEN);
1139                 else
1140                         NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner,
1141                             NFSV4CL_LOCKNAMELEN);
1142                 nlp->nfsl_seqid = 0;
1143                 nlp->nfsl_lockflags = flags;
1144                 nlp->nfsl_inprog = NULL;
1145                 nfscl_lockinit(&nlp->nfsl_rwlock);
1146                 LIST_INIT(&nlp->nfsl_lock);
1147                 if (donelocally) {
1148                         nlp->nfsl_open = NULL;
1149                         nfsstatsv1.cllocallockowners++;
1150                 } else {
1151                         nlp->nfsl_open = op;
1152                         nfsstatsv1.cllockowners++;
1153                 }
1154                 LIST_INSERT_HEAD(lhp, nlp, nfsl_list);
1155                 lp = nlp;
1156                 nlp = NULL;
1157                 *newonep = 1;
1158         }
1159
1160         /*
1161          * Now, update the byte ranges for locks.
1162          */
1163         ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally);
1164         if (!ret)
1165                 donelocally = 1;
1166         if (donelocally) {
1167                 *donelocallyp = 1;
1168                 if (!recovery)
1169                         nfscl_clrelease(clp);
1170         } else {
1171                 /*
1172                  * Serial modifications on the lock owner for multiple threads
1173                  * for the same process using a read/write lock.
1174                  */
1175                 if (!recovery)
1176                         nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1177         }
1178         if (!recovery)
1179                 NFSUNLOCKCLSTATE();
1180
1181         if (nlp)
1182                 free(nlp, M_NFSCLLOCKOWNER);
1183         if (nlop)
1184                 free(nlop, M_NFSCLLOCK);
1185         if (otherlop)
1186                 free(otherlop, M_NFSCLLOCK);
1187
1188         *lpp = lp;
1189         return (0);
1190 }
1191
1192 /*
1193  * Called to unlock a byte range, for LockU.
1194  */
1195 int
1196 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
1197     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
1198     struct nfsclclient *clp, void *id, int flags,
1199     struct nfscllockowner **lpp, int *dorpcp)
1200 {
1201         struct nfscllockowner *lp;
1202         struct nfsclowner *owp;
1203         struct nfsclopen *op;
1204         struct nfscllock *nlop, *other_lop = NULL;
1205         struct nfscldeleg *dp;
1206         struct nfsnode *np;
1207         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1208         int ret = 0, fnd;
1209
1210         np = VTONFS(vp);
1211         *lpp = NULL;
1212         *dorpcp = 0;
1213
1214         /*
1215          * Might need these, so MALLOC them now, to
1216          * avoid a tsleep() in MALLOC later.
1217          */
1218         nlop = malloc(
1219             sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1220         nlop->nfslo_type = F_UNLCK;
1221         nlop->nfslo_first = off;
1222         if (len == NFS64BITSSET) {
1223                 nlop->nfslo_end = NFS64BITSSET;
1224         } else {
1225                 nlop->nfslo_end = off + len;
1226                 if (nlop->nfslo_end <= nlop->nfslo_first) {
1227                         free(nlop, M_NFSCLLOCK);
1228                         return (NFSERR_INVAL);
1229                 }
1230         }
1231         if (callcnt == 0) {
1232                 other_lop = malloc(
1233                     sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
1234                 *other_lop = *nlop;
1235         }
1236         nfscl_filllockowner(id, own, flags);
1237         dp = NULL;
1238         NFSLOCKCLSTATE();
1239         if (callcnt == 0)
1240                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
1241                     np->n_fhp->nfh_len);
1242
1243         /*
1244          * First, unlock any local regions on a delegation.
1245          */
1246         if (dp != NULL) {
1247                 /* Look for this lockowner. */
1248                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1249                         if (!NFSBCMP(lp->nfsl_owner, own,
1250                             NFSV4CL_LOCKNAMELEN))
1251                                 break;
1252                 }
1253                 if (lp != NULL)
1254                         /* Use other_lop, so nlop is still available */
1255                         (void)nfscl_updatelock(lp, &other_lop, NULL, 1);
1256         }
1257
1258         /*
1259          * Now, find a matching open/lockowner that hasn't already been done,
1260          * as marked by nfsl_inprog.
1261          */
1262         lp = NULL;
1263         fnd = 0;
1264         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1265             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1266                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1267                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1268                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1269                         if (lp->nfsl_inprog == NULL &&
1270                             !NFSBCMP(lp->nfsl_owner, own,
1271                              NFSV4CL_LOCKNAMELEN)) {
1272                                 fnd = 1;
1273                                 break;
1274                         }
1275                     }
1276                     if (fnd)
1277                         break;
1278                 }
1279             }
1280             if (fnd)
1281                 break;
1282         }
1283
1284         if (lp != NULL) {
1285                 ret = nfscl_updatelock(lp, &nlop, NULL, 0);
1286                 if (ret)
1287                         *dorpcp = 1;
1288                 /*
1289                  * Serial modifications on the lock owner for multiple
1290                  * threads for the same process using a read/write lock.
1291                  */
1292                 lp->nfsl_inprog = p;
1293                 nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR);
1294                 *lpp = lp;
1295         }
1296         NFSUNLOCKCLSTATE();
1297         if (nlop)
1298                 free(nlop, M_NFSCLLOCK);
1299         if (other_lop)
1300                 free(other_lop, M_NFSCLLOCK);
1301         return (0);
1302 }
1303
1304 /*
1305  * Release all lockowners marked in progess for this process and file.
1306  */
1307 void
1308 nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
1309     void *id, int flags)
1310 {
1311         struct nfsclowner *owp;
1312         struct nfsclopen *op;
1313         struct nfscllockowner *lp;
1314         struct nfsnode *np;
1315         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1316
1317         np = VTONFS(vp);
1318         nfscl_filllockowner(id, own, flags);
1319         NFSLOCKCLSTATE();
1320         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1321             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1322                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1323                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1324                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1325                         if (lp->nfsl_inprog == p &&
1326                             !NFSBCMP(lp->nfsl_owner, own,
1327                             NFSV4CL_LOCKNAMELEN)) {
1328                             lp->nfsl_inprog = NULL;
1329                             nfscl_lockunlock(&lp->nfsl_rwlock);
1330                         }
1331                     }
1332                 }
1333             }
1334         }
1335         nfscl_clrelease(clp);
1336         NFSUNLOCKCLSTATE();
1337 }
1338
1339 /*
1340  * Called to find out if any bytes within the byte range specified are
1341  * write locked by the calling process. Used to determine if flushing
1342  * is required before a LockU.
1343  * If in doubt, return 1, so the flush will occur.
1344  */
1345 int
1346 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
1347     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
1348 {
1349         struct nfsclowner *owp;
1350         struct nfscllockowner *lp;
1351         struct nfsclopen *op;
1352         struct nfsclclient *clp;
1353         struct nfscllock *lop;
1354         struct nfscldeleg *dp;
1355         struct nfsnode *np;
1356         u_int64_t off, end;
1357         u_int8_t own[NFSV4CL_LOCKNAMELEN];
1358         int error = 0;
1359
1360         np = VTONFS(vp);
1361         switch (fl->l_whence) {
1362         case SEEK_SET:
1363         case SEEK_CUR:
1364                 /*
1365                  * Caller is responsible for adding any necessary offset
1366                  * when SEEK_CUR is used.
1367                  */
1368                 off = fl->l_start;
1369                 break;
1370         case SEEK_END:
1371                 off = np->n_size + fl->l_start;
1372                 break;
1373         default:
1374                 return (1);
1375         }
1376         if (fl->l_len != 0) {
1377                 end = off + fl->l_len;
1378                 if (end < off)
1379                         return (1);
1380         } else {
1381                 end = NFS64BITSSET;
1382         }
1383
1384         error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
1385         if (error)
1386                 return (1);
1387         nfscl_filllockowner(id, own, flags);
1388         NFSLOCKCLSTATE();
1389
1390         /*
1391          * First check the delegation locks.
1392          */
1393         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
1394         if (dp != NULL) {
1395                 LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
1396                         if (!NFSBCMP(lp->nfsl_owner, own,
1397                             NFSV4CL_LOCKNAMELEN))
1398                                 break;
1399                 }
1400                 if (lp != NULL) {
1401                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1402                                 if (lop->nfslo_first >= end)
1403                                         break;
1404                                 if (lop->nfslo_end <= off)
1405                                         continue;
1406                                 if (lop->nfslo_type == F_WRLCK) {
1407                                         nfscl_clrelease(clp);
1408                                         NFSUNLOCKCLSTATE();
1409                                         return (1);
1410                                 }
1411                         }
1412                 }
1413         }
1414
1415         /*
1416          * Now, check state against the server.
1417          */
1418         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
1419             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1420                 if (op->nfso_fhlen == np->n_fhp->nfh_len &&
1421                     !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) {
1422                     LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1423                         if (!NFSBCMP(lp->nfsl_owner, own,
1424                             NFSV4CL_LOCKNAMELEN))
1425                             break;
1426                     }
1427                     if (lp != NULL) {
1428                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
1429                             if (lop->nfslo_first >= end)
1430                                 break;
1431                             if (lop->nfslo_end <= off)
1432                                 continue;
1433                             if (lop->nfslo_type == F_WRLCK) {
1434                                 nfscl_clrelease(clp);
1435                                 NFSUNLOCKCLSTATE();
1436                                 return (1);
1437                             }
1438                         }
1439                     }
1440                 }
1441             }
1442         }
1443         nfscl_clrelease(clp);
1444         NFSUNLOCKCLSTATE();
1445         return (0);
1446 }
1447
1448 /*
1449  * Release a byte range lock owner structure.
1450  */
1451 void
1452 nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete)
1453 {
1454         struct nfsclclient *clp;
1455
1456         if (lp == NULL)
1457                 return;
1458         NFSLOCKCLSTATE();
1459         clp = lp->nfsl_open->nfso_own->nfsow_clp;
1460         if (error != 0 && candelete &&
1461             (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0)
1462                 nfscl_freelockowner(lp, 0);
1463         else
1464                 nfscl_lockunlock(&lp->nfsl_rwlock);
1465         nfscl_clrelease(clp);
1466         NFSUNLOCKCLSTATE();
1467 }
1468
1469 /*
1470  * Free up an open structure and any associated byte range lock structures.
1471  */
1472 void
1473 nfscl_freeopen(struct nfsclopen *op, int local)
1474 {
1475
1476         LIST_REMOVE(op, nfso_list);
1477         nfscl_freealllocks(&op->nfso_lock, local);
1478         free(op, M_NFSCLOPEN);
1479         if (local)
1480                 nfsstatsv1.cllocalopens--;
1481         else
1482                 nfsstatsv1.clopens--;
1483 }
1484
1485 /*
1486  * Free up all lock owners and associated locks.
1487  */
1488 static void
1489 nfscl_freealllocks(struct nfscllockownerhead *lhp, int local)
1490 {
1491         struct nfscllockowner *lp, *nlp;
1492
1493         LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) {
1494                 if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1495                         panic("nfscllckw");
1496                 nfscl_freelockowner(lp, local);
1497         }
1498 }
1499
1500 /*
1501  * Called for an Open when NFSERR_EXPIRED is received from the server.
1502  * If there are no byte range locks nor a Share Deny lost, try to do a
1503  * fresh Open. Otherwise, free the open.
1504  */
1505 static int
1506 nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op,
1507     struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
1508 {
1509         struct nfscllockowner *lp;
1510         struct nfscldeleg *dp;
1511         int mustdelete = 0, error;
1512
1513         /*
1514          * Look for any byte range lock(s).
1515          */
1516         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
1517                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
1518                         mustdelete = 1;
1519                         break;
1520                 }
1521         }
1522
1523         /*
1524          * If no byte range lock(s) nor a Share deny, try to re-open.
1525          */
1526         if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) {
1527                 newnfs_copycred(&op->nfso_cred, cred);
1528                 dp = NULL;
1529                 error = nfsrpc_reopen(nmp, op->nfso_fh,
1530                     op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p);
1531                 if (error) {
1532                         mustdelete = 1;
1533                         if (dp != NULL) {
1534                                 free(dp, M_NFSCLDELEG);
1535                                 dp = NULL;
1536                         }
1537                 }
1538                 if (dp != NULL)
1539                         nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh,
1540                             op->nfso_fhlen, cred, p, &dp);
1541         }
1542
1543         /*
1544          * If a byte range lock or Share deny or couldn't re-open, free it.
1545          */
1546         if (mustdelete)
1547                 nfscl_freeopen(op, 0);
1548         return (mustdelete);
1549 }
1550
1551 /*
1552  * Free up an open owner structure.
1553  */
1554 static void
1555 nfscl_freeopenowner(struct nfsclowner *owp, int local)
1556 {
1557
1558         LIST_REMOVE(owp, nfsow_list);
1559         free(owp, M_NFSCLOWNER);
1560         if (local)
1561                 nfsstatsv1.cllocalopenowners--;
1562         else
1563                 nfsstatsv1.clopenowners--;
1564 }
1565
1566 /*
1567  * Free up a byte range lock owner structure.
1568  */
1569 void
1570 nfscl_freelockowner(struct nfscllockowner *lp, int local)
1571 {
1572         struct nfscllock *lop, *nlop;
1573
1574         LIST_REMOVE(lp, nfsl_list);
1575         LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
1576                 nfscl_freelock(lop, local);
1577         }
1578         free(lp, M_NFSCLLOCKOWNER);
1579         if (local)
1580                 nfsstatsv1.cllocallockowners--;
1581         else
1582                 nfsstatsv1.cllockowners--;
1583 }
1584
1585 /*
1586  * Free up a byte range lock structure.
1587  */
1588 void
1589 nfscl_freelock(struct nfscllock *lop, int local)
1590 {
1591
1592         LIST_REMOVE(lop, nfslo_list);
1593         free(lop, M_NFSCLLOCK);
1594         if (local)
1595                 nfsstatsv1.cllocallocks--;
1596         else
1597                 nfsstatsv1.cllocks--;
1598 }
1599
1600 /*
1601  * Clean out the state related to a delegation.
1602  */
1603 static void
1604 nfscl_cleandeleg(struct nfscldeleg *dp)
1605 {
1606         struct nfsclowner *owp, *nowp;
1607         struct nfsclopen *op;
1608
1609         LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
1610                 op = LIST_FIRST(&owp->nfsow_open);
1611                 if (op != NULL) {
1612                         if (LIST_NEXT(op, nfso_list) != NULL)
1613                                 panic("nfscleandel");
1614                         nfscl_freeopen(op, 1);
1615                 }
1616                 nfscl_freeopenowner(owp, 1);
1617         }
1618         nfscl_freealllocks(&dp->nfsdl_lock, 1);
1619 }
1620
1621 /*
1622  * Free a delegation.
1623  */
1624 static void
1625 nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp)
1626 {
1627
1628         TAILQ_REMOVE(hdp, dp, nfsdl_list);
1629         LIST_REMOVE(dp, nfsdl_hash);
1630         free(dp, M_NFSCLDELEG);
1631         nfsstatsv1.cldelegates--;
1632         nfscl_delegcnt--;
1633 }
1634
1635 /*
1636  * Free up all state related to this client structure.
1637  */
1638 static void
1639 nfscl_cleanclient(struct nfsclclient *clp)
1640 {
1641         struct nfsclowner *owp, *nowp;
1642         struct nfsclopen *op, *nop;
1643         struct nfscllayout *lyp, *nlyp;
1644         struct nfscldevinfo *dip, *ndip;
1645
1646         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
1647                 nfscl_freelayout(lyp);
1648
1649         LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip)
1650                 nfscl_freedevinfo(dip);
1651
1652         /* Now, all the OpenOwners, etc. */
1653         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1654                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1655                         nfscl_freeopen(op, 0);
1656                 }
1657                 nfscl_freeopenowner(owp, 0);
1658         }
1659 }
1660
1661 /*
1662  * Called when an NFSERR_EXPIRED is received from the server.
1663  */
1664 static void
1665 nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp,
1666     struct ucred *cred, NFSPROC_T *p)
1667 {
1668         struct nfsclowner *owp, *nowp, *towp;
1669         struct nfsclopen *op, *nop, *top;
1670         struct nfscldeleg *dp, *ndp;
1671         int ret, printed = 0;
1672
1673         /*
1674          * First, merge locally issued Opens into the list for the server.
1675          */
1676         dp = TAILQ_FIRST(&clp->nfsc_deleg);
1677         while (dp != NULL) {
1678             ndp = TAILQ_NEXT(dp, nfsdl_list);
1679             owp = LIST_FIRST(&dp->nfsdl_owner);
1680             while (owp != NULL) {
1681                 nowp = LIST_NEXT(owp, nfsow_list);
1682                 op = LIST_FIRST(&owp->nfsow_open);
1683                 if (op != NULL) {
1684                     if (LIST_NEXT(op, nfso_list) != NULL)
1685                         panic("nfsclexp");
1686                     LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) {
1687                         if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner,
1688                             NFSV4CL_LOCKNAMELEN))
1689                             break;
1690                     }
1691                     if (towp != NULL) {
1692                         /* Merge opens in */
1693                         LIST_FOREACH(top, &towp->nfsow_open, nfso_list) {
1694                             if (top->nfso_fhlen == op->nfso_fhlen &&
1695                                 !NFSBCMP(top->nfso_fh, op->nfso_fh,
1696                                  op->nfso_fhlen)) {
1697                                 top->nfso_mode |= op->nfso_mode;
1698                                 top->nfso_opencnt += op->nfso_opencnt;
1699                                 break;
1700                             }
1701                         }
1702                         if (top == NULL) {
1703                             /* Just add the open to the owner list */
1704                             LIST_REMOVE(op, nfso_list);
1705                             op->nfso_own = towp;
1706                             LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list);
1707                             nfsstatsv1.cllocalopens--;
1708                             nfsstatsv1.clopens++;
1709                         }
1710                     } else {
1711                         /* Just add the openowner to the client list */
1712                         LIST_REMOVE(owp, nfsow_list);
1713                         owp->nfsow_clp = clp;
1714                         LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list);
1715                         nfsstatsv1.cllocalopenowners--;
1716                         nfsstatsv1.clopenowners++;
1717                         nfsstatsv1.cllocalopens--;
1718                         nfsstatsv1.clopens++;
1719                     }
1720                 }
1721                 owp = nowp;
1722             }
1723             if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) {
1724                 printed = 1;
1725                 printf("nfsv4 expired locks lost\n");
1726             }
1727             nfscl_cleandeleg(dp);
1728             nfscl_freedeleg(&clp->nfsc_deleg, dp);
1729             dp = ndp;
1730         }
1731         if (!TAILQ_EMPTY(&clp->nfsc_deleg))
1732             panic("nfsclexp");
1733
1734         /*
1735          * Now, try and reopen against the server.
1736          */
1737         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1738                 owp->nfsow_seqid = 0;
1739                 LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) {
1740                         ret = nfscl_expireopen(clp, op, nmp, cred, p);
1741                         if (ret && !printed) {
1742                                 printed = 1;
1743                                 printf("nfsv4 expired locks lost\n");
1744                         }
1745                 }
1746                 if (LIST_EMPTY(&owp->nfsow_open))
1747                         nfscl_freeopenowner(owp, 0);
1748         }
1749 }
1750
1751 /*
1752  * This function must be called after the process represented by "own" has
1753  * exited. Must be called with CLSTATE lock held.
1754  */
1755 static void
1756 nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own)
1757 {
1758         struct nfsclowner *owp, *nowp;
1759         struct nfscllockowner *lp, *nlp;
1760         struct nfscldeleg *dp;
1761
1762         /* First, get rid of local locks on delegations. */
1763         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1764                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1765                     if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
1766                         if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED))
1767                             panic("nfscllckw");
1768                         nfscl_freelockowner(lp, 1);
1769                     }
1770                 }
1771         }
1772         owp = LIST_FIRST(&clp->nfsc_owner);
1773         while (owp != NULL) {
1774                 nowp = LIST_NEXT(owp, nfsow_list);
1775                 if (!NFSBCMP(owp->nfsow_owner, own,
1776                     NFSV4CL_LOCKNAMELEN)) {
1777                         /*
1778                          * If there are children that haven't closed the
1779                          * file descriptors yet, the opens will still be
1780                          * here. For that case, let the renew thread clear
1781                          * out the OpenOwner later.
1782                          */
1783                         if (LIST_EMPTY(&owp->nfsow_open))
1784                                 nfscl_freeopenowner(owp, 0);
1785                         else
1786                                 owp->nfsow_defunct = 1;
1787                 }
1788                 owp = nowp;
1789         }
1790 }
1791
1792 /*
1793  * Find open/lock owners for processes that have exited.
1794  */
1795 static void
1796 nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
1797 {
1798         struct nfsclowner *owp, *nowp;
1799         struct nfsclopen *op;
1800         struct nfscllockowner *lp, *nlp;
1801         struct nfscldeleg *dp;
1802
1803         /*
1804          * All the pidhash locks must be acquired, since they are sx locks
1805          * and must be acquired before the mutexes.  The pid(s) that will
1806          * be used aren't known yet, so all the locks need to be acquired.
1807          * Fortunately, this function is only performed once/sec.
1808          */
1809         pidhash_slockall();
1810         NFSLOCKCLSTATE();
1811         LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
1812                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
1813                         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) {
1814                                 if (LIST_EMPTY(&lp->nfsl_lock))
1815                                         nfscl_emptylockowner(lp, lhp);
1816                         }
1817                 }
1818                 if (nfscl_procdoesntexist(owp->nfsow_owner))
1819                         nfscl_cleanup_common(clp, owp->nfsow_owner);
1820         }
1821
1822         /*
1823          * For the single open_owner case, these lock owners need to be
1824          * checked to see if they still exist separately.
1825          * This is because nfscl_procdoesntexist() never returns true for
1826          * the single open_owner so that the above doesn't ever call
1827          * nfscl_cleanup_common().
1828          */
1829         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
1830                 LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
1831                         if (nfscl_procdoesntexist(lp->nfsl_owner))
1832                                 nfscl_cleanup_common(clp, lp->nfsl_owner);
1833                 }
1834         }
1835         NFSUNLOCKCLSTATE();
1836         pidhash_sunlockall();
1837 }
1838
1839 /*
1840  * Take the empty lock owner and move it to the local lhp list if the
1841  * associated process no longer exists.
1842  */
1843 static void
1844 nfscl_emptylockowner(struct nfscllockowner *lp,
1845     struct nfscllockownerfhhead *lhp)
1846 {
1847         struct nfscllockownerfh *lfhp, *mylfhp;
1848         struct nfscllockowner *nlp;
1849         int fnd_it;
1850
1851         /* If not a Posix lock owner, just return. */
1852         if ((lp->nfsl_lockflags & F_POSIX) == 0)
1853                 return;
1854
1855         fnd_it = 0;
1856         mylfhp = NULL;
1857         /*
1858          * First, search to see if this lock owner is already in the list.
1859          * If it is, then the associated process no longer exists.
1860          */
1861         SLIST_FOREACH(lfhp, lhp, nfslfh_list) {
1862                 if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen &&
1863                     !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh,
1864                     lfhp->nfslfh_len))
1865                         mylfhp = lfhp;
1866                 LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list)
1867                         if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner,
1868                             NFSV4CL_LOCKNAMELEN))
1869                                 fnd_it = 1;
1870         }
1871         /* If not found, check if process still exists. */
1872         if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0)
1873                 return;
1874
1875         /* Move the lock owner over to the local list. */
1876         if (mylfhp == NULL) {
1877                 mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP,
1878                     M_NOWAIT);
1879                 if (mylfhp == NULL)
1880                         return;
1881                 mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen;
1882                 NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh,
1883                     mylfhp->nfslfh_len);
1884                 LIST_INIT(&mylfhp->nfslfh_lock);
1885                 SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list);
1886         }
1887         LIST_REMOVE(lp, nfsl_list);
1888         LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list);
1889 }
1890
1891 static int      fake_global;    /* Used to force visibility of MNTK_UNMOUNTF */
1892 /*
1893  * Called from nfs umount to free up the clientid.
1894  */
1895 void
1896 nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p)
1897 {
1898         struct nfsclclient *clp;
1899         struct ucred *cred;
1900         int igotlock;
1901
1902         /*
1903          * For the case that matters, this is the thread that set
1904          * MNTK_UNMOUNTF, so it will see it set. The code that follows is
1905          * done to ensure that any thread executing nfscl_getcl() after
1906          * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the
1907          * mutex for NFSLOCKCLSTATE(), so it is "m" for the following
1908          * explanation, courtesy of Alan Cox.
1909          * What follows is a snippet from Alan Cox's email at:
1910          * https://docs.FreeBSD.org/cgi/mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw
1911          * 
1912          * 1. Set MNTK_UNMOUNTF
1913          * 2. Acquire a standard FreeBSD mutex "m".
1914          * 3. Update some data structures.
1915          * 4. Release mutex "m".
1916          * 
1917          * Then, other threads that acquire "m" after step 4 has occurred will
1918          * see MNTK_UNMOUNTF as set.  But, other threads that beat thread X to
1919          * step 2 may or may not see MNTK_UNMOUNTF as set.
1920          */
1921         NFSLOCKCLSTATE();
1922         if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1923                 fake_global++;
1924                 NFSUNLOCKCLSTATE();
1925                 NFSLOCKCLSTATE();
1926         }
1927
1928         clp = nmp->nm_clp;
1929         if (clp != NULL) {
1930                 if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0)
1931                         panic("nfscl umount");
1932
1933                 /*
1934                  * First, handshake with the nfscl renew thread, to terminate
1935                  * it.
1936                  */
1937                 clp->nfsc_flags |= NFSCLFLAGS_UMOUNT;
1938                 while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD)
1939                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT,
1940                             "nfsclumnt", hz);
1941
1942                 /*
1943                  * Now, get the exclusive lock on the client state, so
1944                  * that no uses of the state are still in progress.
1945                  */
1946                 do {
1947                         igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
1948                             NFSCLSTATEMUTEXPTR, NULL);
1949                 } while (!igotlock);
1950                 NFSUNLOCKCLSTATE();
1951
1952                 /*
1953                  * Free up all the state. It will expire on the server, but
1954                  * maybe we should do a SetClientId/SetClientIdConfirm so
1955                  * the server throws it away?
1956                  */
1957                 LIST_REMOVE(clp, nfsc_list);
1958                 nfscl_delegreturnall(clp, p);
1959                 cred = newnfs_getcred();
1960                 if (NFSHASNFSV4N(nmp)) {
1961                         (void)nfsrpc_destroysession(nmp, clp, cred, p);
1962                         (void)nfsrpc_destroyclient(nmp, clp, cred, p);
1963                 } else
1964                         (void)nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
1965                 nfscl_cleanclient(clp);
1966                 nmp->nm_clp = NULL;
1967                 NFSFREECRED(cred);
1968                 free(clp, M_NFSCLCLIENT);
1969         } else
1970                 NFSUNLOCKCLSTATE();
1971 }
1972
1973 /*
1974  * This function is called when a server replies with NFSERR_STALECLIENTID
1975  * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists,
1976  * doing Opens and Locks with reclaim. If these fail, it deletes the
1977  * corresponding state.
1978  */
1979 static void
1980 nfscl_recover(struct nfsclclient *clp, bool *retokp, struct ucred *cred,
1981     NFSPROC_T *p)
1982 {
1983         struct nfsclowner *owp, *nowp;
1984         struct nfsclopen *op, *nop;
1985         struct nfscllockowner *lp, *nlp;
1986         struct nfscllock *lop, *nlop;
1987         struct nfscldeleg *dp, *ndp, *tdp;
1988         struct nfsmount *nmp;
1989         struct ucred *tcred;
1990         struct nfsclopenhead extra_open;
1991         struct nfscldeleghead extra_deleg;
1992         struct nfsreq *rep;
1993         u_int64_t len;
1994         u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode;
1995         int i, igotlock = 0, error, trycnt, firstlock;
1996         struct nfscllayout *lyp, *nlyp;
1997
1998         /*
1999          * First, lock the client structure, so everyone else will
2000          * block when trying to use state.
2001          */
2002         NFSLOCKCLSTATE();
2003         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2004         do {
2005                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2006                     NFSCLSTATEMUTEXPTR, NULL);
2007         } while (!igotlock);
2008         NFSUNLOCKCLSTATE();
2009
2010         nmp = clp->nfsc_nmp;
2011         if (nmp == NULL)
2012                 panic("nfscl recover");
2013
2014         /*
2015          * For now, just get rid of all layouts. There may be a need
2016          * to do LayoutCommit Ops with reclaim == true later.
2017          */
2018         TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp)
2019                 nfscl_freelayout(lyp);
2020         TAILQ_INIT(&clp->nfsc_layout);
2021         for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
2022                 LIST_INIT(&clp->nfsc_layouthash[i]);
2023
2024         trycnt = 5;
2025         tcred = NULL;
2026         do {
2027                 error = nfsrpc_setclient(nmp, clp, 1, retokp, cred, p);
2028         } while ((error == NFSERR_STALECLIENTID ||
2029              error == NFSERR_BADSESSION ||
2030              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2031         if (error) {
2032                 NFSLOCKCLSTATE();
2033                 clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER |
2034                     NFSCLFLAGS_RECVRINPROG);
2035                 wakeup(&clp->nfsc_flags);
2036                 nfsv4_unlock(&clp->nfsc_lock, 0);
2037                 NFSUNLOCKCLSTATE();
2038                 return;
2039         }
2040         clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2041         clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2042
2043         /*
2044          * Mark requests already queued on the server, so that they don't
2045          * initiate another recovery cycle. Any requests already in the
2046          * queue that handle state information will have the old stale
2047          * clientid/stateid and will get a NFSERR_STALESTATEID,
2048          * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server.
2049          * This will be translated to NFSERR_STALEDONTRECOVER when
2050          * R_DONTRECOVER is set.
2051          */
2052         NFSLOCKREQ();
2053         TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) {
2054                 if (rep->r_nmp == nmp)
2055                         rep->r_flags |= R_DONTRECOVER;
2056         }
2057         NFSUNLOCKREQ();
2058
2059         /*
2060          * If nfsrpc_setclient() returns *retokp == true,
2061          * no more recovery is needed.
2062          */
2063         if (*retokp)
2064                 goto out;
2065
2066         /*
2067          * Now, mark all delegations "need reclaim".
2068          */
2069         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list)
2070                 dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM;
2071
2072         TAILQ_INIT(&extra_deleg);
2073         LIST_INIT(&extra_open);
2074         /*
2075          * Now traverse the state lists, doing Open and Lock Reclaims.
2076          */
2077         tcred = newnfs_getcred();
2078         owp = LIST_FIRST(&clp->nfsc_owner);
2079         while (owp != NULL) {
2080             nowp = LIST_NEXT(owp, nfsow_list);
2081             owp->nfsow_seqid = 0;
2082             op = LIST_FIRST(&owp->nfsow_open);
2083             while (op != NULL) {
2084                 nop = LIST_NEXT(op, nfso_list);
2085                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2086                     /* Search for a delegation to reclaim with the open */
2087                     TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2088                         if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2089                             continue;
2090                         if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2091                             mode = NFSV4OPEN_ACCESSWRITE;
2092                             delegtype = NFSV4OPEN_DELEGATEWRITE;
2093                         } else {
2094                             mode = NFSV4OPEN_ACCESSREAD;
2095                             delegtype = NFSV4OPEN_DELEGATEREAD;
2096                         }
2097                         if ((op->nfso_mode & mode) == mode &&
2098                             op->nfso_fhlen == dp->nfsdl_fhlen &&
2099                             !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen))
2100                             break;
2101                     }
2102                     ndp = dp;
2103                     if (dp == NULL)
2104                         delegtype = NFSV4OPEN_DELEGATENONE;
2105                     newnfs_copycred(&op->nfso_cred, tcred);
2106                     error = nfscl_tryopen(nmp, NULL, op->nfso_fh,
2107                         op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen,
2108                         op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype,
2109                         tcred, p);
2110                     if (!error) {
2111                         /* Handle any replied delegation */
2112                         if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE)
2113                             || NFSMNT_RDONLY(nmp->nm_mountp))) {
2114                             if ((ndp->nfsdl_flags & NFSCLDL_WRITE))
2115                                 mode = NFSV4OPEN_ACCESSWRITE;
2116                             else
2117                                 mode = NFSV4OPEN_ACCESSREAD;
2118                             TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2119                                 if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM))
2120                                     continue;
2121                                 if ((op->nfso_mode & mode) == mode &&
2122                                     op->nfso_fhlen == dp->nfsdl_fhlen &&
2123                                     !NFSBCMP(op->nfso_fh, dp->nfsdl_fh,
2124                                     op->nfso_fhlen)) {
2125                                     dp->nfsdl_stateid = ndp->nfsdl_stateid;
2126                                     dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit;
2127                                     dp->nfsdl_ace = ndp->nfsdl_ace;
2128                                     dp->nfsdl_change = ndp->nfsdl_change;
2129                                     dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2130                                     if ((ndp->nfsdl_flags & NFSCLDL_RECALL))
2131                                         dp->nfsdl_flags |= NFSCLDL_RECALL;
2132                                     free(ndp, M_NFSCLDELEG);
2133                                     ndp = NULL;
2134                                     break;
2135                                 }
2136                             }
2137                         }
2138                         if (ndp != NULL)
2139                             TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list);
2140
2141                         /* and reclaim all byte range locks */
2142                         lp = LIST_FIRST(&op->nfso_lock);
2143                         while (lp != NULL) {
2144                             nlp = LIST_NEXT(lp, nfsl_list);
2145                             lp->nfsl_seqid = 0;
2146                             firstlock = 1;
2147                             lop = LIST_FIRST(&lp->nfsl_lock);
2148                             while (lop != NULL) {
2149                                 nlop = LIST_NEXT(lop, nfslo_list);
2150                                 if (lop->nfslo_end == NFS64BITSSET)
2151                                     len = NFS64BITSSET;
2152                                 else
2153                                     len = lop->nfslo_end - lop->nfslo_first;
2154                                 error = nfscl_trylock(nmp, NULL,
2155                                     op->nfso_fh, op->nfso_fhlen, lp,
2156                                     firstlock, 1, lop->nfslo_first, len,
2157                                     lop->nfslo_type, tcred, p);
2158                                 if (error != 0)
2159                                     nfscl_freelock(lop, 0);
2160                                 else
2161                                     firstlock = 0;
2162                                 lop = nlop;
2163                             }
2164                             /* If no locks, but a lockowner, just delete it. */
2165                             if (LIST_EMPTY(&lp->nfsl_lock))
2166                                 nfscl_freelockowner(lp, 0);
2167                             lp = nlp;
2168                         }
2169                     }
2170                 }
2171                 if (error != 0 && error != NFSERR_BADSESSION)
2172                     nfscl_freeopen(op, 0);
2173                 op = nop;
2174             }
2175             owp = nowp;
2176         }
2177
2178         /*
2179          * Now, try and get any delegations not yet reclaimed by cobbling
2180          * to-gether an appropriate open.
2181          */
2182         nowp = NULL;
2183         dp = TAILQ_FIRST(&clp->nfsc_deleg);
2184         while (dp != NULL) {
2185             ndp = TAILQ_NEXT(dp, nfsdl_list);
2186             if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) {
2187                 if (nowp == NULL) {
2188                     nowp = malloc(
2189                         sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK);
2190                     /*
2191                      * Name must be as long an largest possible
2192                      * NFSV4CL_LOCKNAMELEN. 12 for now.
2193                      */
2194                     NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner,
2195                         NFSV4CL_LOCKNAMELEN);
2196                     LIST_INIT(&nowp->nfsow_open);
2197                     nowp->nfsow_clp = clp;
2198                     nowp->nfsow_seqid = 0;
2199                     nowp->nfsow_defunct = 0;
2200                     nfscl_lockinit(&nowp->nfsow_rwlock);
2201                 }
2202                 nop = NULL;
2203                 if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) {
2204                     nop = malloc(sizeof (struct nfsclopen) +
2205                         dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
2206                     nop->nfso_own = nowp;
2207                     if ((dp->nfsdl_flags & NFSCLDL_WRITE)) {
2208                         nop->nfso_mode = NFSV4OPEN_ACCESSWRITE;
2209                         delegtype = NFSV4OPEN_DELEGATEWRITE;
2210                     } else {
2211                         nop->nfso_mode = NFSV4OPEN_ACCESSREAD;
2212                         delegtype = NFSV4OPEN_DELEGATEREAD;
2213                     }
2214                     nop->nfso_opencnt = 0;
2215                     nop->nfso_posixlock = 1;
2216                     nop->nfso_fhlen = dp->nfsdl_fhlen;
2217                     NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen);
2218                     LIST_INIT(&nop->nfso_lock);
2219                     nop->nfso_stateid.seqid = 0;
2220                     nop->nfso_stateid.other[0] = 0;
2221                     nop->nfso_stateid.other[1] = 0;
2222                     nop->nfso_stateid.other[2] = 0;
2223                     newnfs_copycred(&dp->nfsdl_cred, tcred);
2224                     newnfs_copyincred(tcred, &nop->nfso_cred);
2225                     tdp = NULL;
2226                     error = nfscl_tryopen(nmp, NULL, nop->nfso_fh,
2227                         nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen,
2228                         nop->nfso_mode, nop, NULL, 0, &tdp, 1,
2229                         delegtype, tcred, p);
2230                     if (tdp != NULL) {
2231                         if ((tdp->nfsdl_flags & NFSCLDL_WRITE))
2232                             mode = NFSV4OPEN_ACCESSWRITE;
2233                         else
2234                             mode = NFSV4OPEN_ACCESSREAD;
2235                         if ((nop->nfso_mode & mode) == mode &&
2236                             nop->nfso_fhlen == tdp->nfsdl_fhlen &&
2237                             !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh,
2238                             nop->nfso_fhlen)) {
2239                             dp->nfsdl_stateid = tdp->nfsdl_stateid;
2240                             dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit;
2241                             dp->nfsdl_ace = tdp->nfsdl_ace;
2242                             dp->nfsdl_change = tdp->nfsdl_change;
2243                             dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM;
2244                             if ((tdp->nfsdl_flags & NFSCLDL_RECALL))
2245                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2246                             free(tdp, M_NFSCLDELEG);
2247                         } else {
2248                             TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list);
2249                         }
2250                     }
2251                 }
2252                 if (error) {
2253                     if (nop != NULL)
2254                         free(nop, M_NFSCLOPEN);
2255                     /*
2256                      * Couldn't reclaim it, so throw the state
2257                      * away. Ouch!!
2258                      */
2259                     nfscl_cleandeleg(dp);
2260                     nfscl_freedeleg(&clp->nfsc_deleg, dp);
2261                 } else {
2262                     LIST_INSERT_HEAD(&extra_open, nop, nfso_list);
2263                 }
2264             }
2265             dp = ndp;
2266         }
2267
2268         /*
2269          * Now, get rid of extra Opens and Delegations.
2270          */
2271         LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) {
2272                 do {
2273                         newnfs_copycred(&op->nfso_cred, tcred);
2274                         error = nfscl_tryclose(op, tcred, nmp, p);
2275                         if (error == NFSERR_GRACE)
2276                                 (void) nfs_catnap(PZERO, error, "nfsexcls");
2277                 } while (error == NFSERR_GRACE);
2278                 LIST_REMOVE(op, nfso_list);
2279                 free(op, M_NFSCLOPEN);
2280         }
2281         if (nowp != NULL)
2282                 free(nowp, M_NFSCLOWNER);
2283
2284         TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) {
2285                 do {
2286                         newnfs_copycred(&dp->nfsdl_cred, tcred);
2287                         error = nfscl_trydelegreturn(dp, tcred, nmp, p);
2288                         if (error == NFSERR_GRACE)
2289                                 (void) nfs_catnap(PZERO, error, "nfsexdlg");
2290                 } while (error == NFSERR_GRACE);
2291                 TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list);
2292                 free(dp, M_NFSCLDELEG);
2293         }
2294
2295         /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */
2296         if (NFSHASNFSV4N(nmp))
2297                 (void)nfsrpc_reclaimcomplete(nmp, cred, p);
2298
2299 out:
2300         NFSLOCKCLSTATE();
2301         clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG;
2302         wakeup(&clp->nfsc_flags);
2303         nfsv4_unlock(&clp->nfsc_lock, 0);
2304         NFSUNLOCKCLSTATE();
2305         if (tcred != NULL)
2306                 NFSFREECRED(tcred);
2307 }
2308
2309 /*
2310  * This function is called when a server replies with NFSERR_EXPIRED.
2311  * It deletes all state for the client and does a fresh SetClientId/confirm.
2312  * XXX Someday it should post a signal to the process(es) that hold the
2313  * state, so they know that lock state has been lost.
2314  */
2315 int
2316 nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p)
2317 {
2318         struct nfsmount *nmp;
2319         struct ucred *cred;
2320         int igotlock = 0, error, trycnt;
2321
2322         /*
2323          * If the clientid has gone away or a new SetClientid has already
2324          * been done, just return ok.
2325          */
2326         if (clp == NULL || clidrev != clp->nfsc_clientidrev)
2327                 return (0);
2328
2329         /*
2330          * First, lock the client structure, so everyone else will
2331          * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so
2332          * that only one thread does the work.
2333          */
2334         NFSLOCKCLSTATE();
2335         clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT;
2336         do {
2337                 igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
2338                     NFSCLSTATEMUTEXPTR, NULL);
2339         } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT));
2340         if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) {
2341                 if (igotlock)
2342                         nfsv4_unlock(&clp->nfsc_lock, 0);
2343                 NFSUNLOCKCLSTATE();
2344                 return (0);
2345         }
2346         clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG;
2347         NFSUNLOCKCLSTATE();
2348
2349         nmp = clp->nfsc_nmp;
2350         if (nmp == NULL)
2351                 panic("nfscl expired");
2352         cred = newnfs_getcred();
2353         trycnt = 5;
2354         do {
2355                 error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
2356         } while ((error == NFSERR_STALECLIENTID ||
2357              error == NFSERR_BADSESSION ||
2358              error == NFSERR_STALEDONTRECOVER) && --trycnt > 0);
2359         if (error) {
2360                 NFSLOCKCLSTATE();
2361                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2362         } else {
2363                 /*
2364                  * Expire the state for the client.
2365                  */
2366                 nfscl_expireclient(clp, nmp, cred, p);
2367                 NFSLOCKCLSTATE();
2368                 clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID;
2369                 clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER;
2370         }
2371         clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG);
2372         wakeup(&clp->nfsc_flags);
2373         nfsv4_unlock(&clp->nfsc_lock, 0);
2374         NFSUNLOCKCLSTATE();
2375         NFSFREECRED(cred);
2376         return (error);
2377 }
2378
2379 /*
2380  * This function inserts a lock in the list after insert_lop.
2381  */
2382 static void
2383 nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop,
2384     struct nfscllock *insert_lop, int local)
2385 {
2386
2387         if ((struct nfscllockowner *)insert_lop == lp)
2388                 LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list);
2389         else
2390                 LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list);
2391         if (local)
2392                 nfsstatsv1.cllocallocks++;
2393         else
2394                 nfsstatsv1.cllocks++;
2395 }
2396
2397 /*
2398  * This function updates the locking for a lock owner and given file. It
2399  * maintains a list of lock ranges ordered on increasing file offset that
2400  * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style).
2401  * It always adds new_lop to the list and sometimes uses the one pointed
2402  * at by other_lopp.
2403  * Returns 1 if the locks were modified, 0 otherwise.
2404  */
2405 static int
2406 nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp,
2407     struct nfscllock **other_lopp, int local)
2408 {
2409         struct nfscllock *new_lop = *new_lopp;
2410         struct nfscllock *lop, *tlop, *ilop;
2411         struct nfscllock *other_lop;
2412         int unlock = 0, modified = 0;
2413         u_int64_t tmp;
2414
2415         /*
2416          * Work down the list until the lock is merged.
2417          */
2418         if (new_lop->nfslo_type == F_UNLCK)
2419                 unlock = 1;
2420         ilop = (struct nfscllock *)lp;
2421         lop = LIST_FIRST(&lp->nfsl_lock);
2422         while (lop != NULL) {
2423             /*
2424              * Only check locks for this file that aren't before the start of
2425              * new lock's range.
2426              */
2427             if (lop->nfslo_end >= new_lop->nfslo_first) {
2428                 if (new_lop->nfslo_end < lop->nfslo_first) {
2429                     /*
2430                      * If the new lock ends before the start of the
2431                      * current lock's range, no merge, just insert
2432                      * the new lock.
2433                      */
2434                     break;
2435                 }
2436                 if (new_lop->nfslo_type == lop->nfslo_type ||
2437                     (new_lop->nfslo_first <= lop->nfslo_first &&
2438                      new_lop->nfslo_end >= lop->nfslo_end)) {
2439                     /*
2440                      * This lock can be absorbed by the new lock/unlock.
2441                      * This happens when it covers the entire range
2442                      * of the old lock or is contiguous
2443                      * with the old lock and is of the same type or an
2444                      * unlock.
2445                      */
2446                     if (new_lop->nfslo_type != lop->nfslo_type ||
2447                         new_lop->nfslo_first != lop->nfslo_first ||
2448                         new_lop->nfslo_end != lop->nfslo_end)
2449                         modified = 1;
2450                     if (lop->nfslo_first < new_lop->nfslo_first)
2451                         new_lop->nfslo_first = lop->nfslo_first;
2452                     if (lop->nfslo_end > new_lop->nfslo_end)
2453                         new_lop->nfslo_end = lop->nfslo_end;
2454                     tlop = lop;
2455                     lop = LIST_NEXT(lop, nfslo_list);
2456                     nfscl_freelock(tlop, local);
2457                     continue;
2458                 }
2459
2460                 /*
2461                  * All these cases are for contiguous locks that are not the
2462                  * same type, so they can't be merged.
2463                  */
2464                 if (new_lop->nfslo_first <= lop->nfslo_first) {
2465                     /*
2466                      * This case is where the new lock overlaps with the
2467                      * first part of the old lock. Move the start of the
2468                      * old lock to just past the end of the new lock. The
2469                      * new lock will be inserted in front of the old, since
2470                      * ilop hasn't been updated. (We are done now.)
2471                      */
2472                     if (lop->nfslo_first != new_lop->nfslo_end) {
2473                         lop->nfslo_first = new_lop->nfslo_end;
2474                         modified = 1;
2475                     }
2476                     break;
2477                 }
2478                 if (new_lop->nfslo_end >= lop->nfslo_end) {
2479                     /*
2480                      * This case is where the new lock overlaps with the
2481                      * end of the old lock's range. Move the old lock's
2482                      * end to just before the new lock's first and insert
2483                      * the new lock after the old lock.
2484                      * Might not be done yet, since the new lock could
2485                      * overlap further locks with higher ranges.
2486                      */
2487                     if (lop->nfslo_end != new_lop->nfslo_first) {
2488                         lop->nfslo_end = new_lop->nfslo_first;
2489                         modified = 1;
2490                     }
2491                     ilop = lop;
2492                     lop = LIST_NEXT(lop, nfslo_list);
2493                     continue;
2494                 }
2495                 /*
2496                  * The final case is where the new lock's range is in the
2497                  * middle of the current lock's and splits the current lock
2498                  * up. Use *other_lopp to handle the second part of the
2499                  * split old lock range. (We are done now.)
2500                  * For unlock, we use new_lop as other_lop and tmp, since
2501                  * other_lop and new_lop are the same for this case.
2502                  * We noted the unlock case above, so we don't need
2503                  * new_lop->nfslo_type any longer.
2504                  */
2505                 tmp = new_lop->nfslo_first;
2506                 if (unlock) {
2507                     other_lop = new_lop;
2508                     *new_lopp = NULL;
2509                 } else {
2510                     other_lop = *other_lopp;
2511                     *other_lopp = NULL;
2512                 }
2513                 other_lop->nfslo_first = new_lop->nfslo_end;
2514                 other_lop->nfslo_end = lop->nfslo_end;
2515                 other_lop->nfslo_type = lop->nfslo_type;
2516                 lop->nfslo_end = tmp;
2517                 nfscl_insertlock(lp, other_lop, lop, local);
2518                 ilop = lop;
2519                 modified = 1;
2520                 break;
2521             }
2522             ilop = lop;
2523             lop = LIST_NEXT(lop, nfslo_list);
2524             if (lop == NULL)
2525                 break;
2526         }
2527
2528         /*
2529          * Insert the new lock in the list at the appropriate place.
2530          */
2531         if (!unlock) {
2532                 nfscl_insertlock(lp, new_lop, ilop, local);
2533                 *new_lopp = NULL;
2534                 modified = 1;
2535         }
2536         return (modified);
2537 }
2538
2539 /*
2540  * This function must be run as a kernel thread.
2541  * It does Renew Ops and recovery, when required.
2542  */
2543 void
2544 nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p)
2545 {
2546         struct nfsclowner *owp, *nowp;
2547         struct nfsclopen *op;
2548         struct nfscllockowner *lp, *nlp;
2549         struct nfscldeleghead dh;
2550         struct nfscldeleg *dp, *ndp;
2551         struct ucred *cred;
2552         u_int32_t clidrev;
2553         int error, cbpathdown, islept, igotlock, ret, clearok;
2554         uint32_t recover_done_time = 0;
2555         time_t mytime;
2556         static time_t prevsec = 0;
2557         struct nfscllockownerfh *lfhp, *nlfhp;
2558         struct nfscllockownerfhhead lfh;
2559         struct nfscllayout *lyp, *nlyp;
2560         struct nfscldevinfo *dip, *ndip;
2561         struct nfscllayouthead rlh;
2562         struct nfsclrecalllayout *recallp;
2563         struct nfsclds *dsp;
2564         bool retok;
2565         struct mount *mp;
2566         vnode_t vp;
2567
2568         cred = newnfs_getcred();
2569         NFSLOCKCLSTATE();
2570         clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD;
2571         mp = clp->nfsc_nmp->nm_mountp;
2572         NFSUNLOCKCLSTATE();
2573         for(;;) {
2574                 newnfs_setroot(cred);
2575                 cbpathdown = 0;
2576                 if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) {
2577                         /*
2578                          * Only allow one full recover within 1/2 of the lease
2579                          * duration (nfsc_renew).
2580                          * retok is value/result.  If passed in set to true,
2581                          * it indicates only a CreateSession operation should
2582                          * be attempted.
2583                          * If it is returned true, it indicates that the
2584                          * recovery only required a CreateSession.
2585                          */
2586                         retok = true;
2587                         if (recover_done_time < NFSD_MONOSEC) {
2588                                 recover_done_time = NFSD_MONOSEC +
2589                                     clp->nfsc_renew;
2590                                 retok = false;
2591                         }
2592                         NFSCL_DEBUG(1, "Doing recovery, only "
2593                             "createsession=%d\n", retok);
2594                         nfscl_recover(clp, &retok, cred, p);
2595                 }
2596                 if (clp->nfsc_expire <= NFSD_MONOSEC &&
2597                     (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) {
2598                         clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
2599                         clidrev = clp->nfsc_clientidrev;
2600                         error = nfsrpc_renew(clp, NULL, cred, p);
2601                         if (error == NFSERR_CBPATHDOWN)
2602                             cbpathdown = 1;
2603                         else if (error == NFSERR_STALECLIENTID ||
2604                             error == NFSERR_BADSESSION) {
2605                             NFSLOCKCLSTATE();
2606                             clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2607                             NFSUNLOCKCLSTATE();
2608                         } else if (error == NFSERR_EXPIRED)
2609                             (void) nfscl_hasexpired(clp, clidrev, p);
2610                 }
2611
2612 checkdsrenew:
2613                 if (NFSHASNFSV4N(clp->nfsc_nmp)) {
2614                         /* Do renews for any DS sessions. */
2615                         NFSLOCKMNT(clp->nfsc_nmp);
2616                         /* Skip first entry, since the MDS is handled above. */
2617                         dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess);
2618                         if (dsp != NULL)
2619                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2620                         while (dsp != NULL) {
2621                                 if (dsp->nfsclds_expire <= NFSD_MONOSEC &&
2622                                     dsp->nfsclds_sess.nfsess_defunct == 0) {
2623                                         dsp->nfsclds_expire = NFSD_MONOSEC +
2624                                             clp->nfsc_renew;
2625                                         NFSUNLOCKMNT(clp->nfsc_nmp);
2626                                         (void)nfsrpc_renew(clp, dsp, cred, p);
2627                                         goto checkdsrenew;
2628                                 }
2629                                 dsp = TAILQ_NEXT(dsp, nfsclds_list);
2630                         }
2631                         NFSUNLOCKMNT(clp->nfsc_nmp);
2632                 }
2633
2634                 TAILQ_INIT(&dh);
2635                 NFSLOCKCLSTATE();
2636                 if (cbpathdown)
2637                         /* It's a Total Recall! */
2638                         nfscl_totalrecall(clp);
2639
2640                 /*
2641                  * Now, handle defunct owners.
2642                  */
2643                 LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) {
2644                         if (LIST_EMPTY(&owp->nfsow_open)) {
2645                                 if (owp->nfsow_defunct != 0)
2646                                         nfscl_freeopenowner(owp, 0);
2647                         }
2648                 }
2649
2650                 /*
2651                  * Do the recall on any delegations. To avoid trouble, always
2652                  * come back up here after having slept.
2653                  */
2654                 igotlock = 0;
2655 tryagain:
2656                 dp = TAILQ_FIRST(&clp->nfsc_deleg);
2657                 while (dp != NULL) {
2658                         ndp = TAILQ_NEXT(dp, nfsdl_list);
2659                         if ((dp->nfsdl_flags & NFSCLDL_RECALL)) {
2660                                 /*
2661                                  * Wait for outstanding I/O ops to be done.
2662                                  */
2663                                 if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
2664                                     if (igotlock) {
2665                                         nfsv4_unlock(&clp->nfsc_lock, 0);
2666                                         igotlock = 0;
2667                                     }
2668                                     dp->nfsdl_rwlock.nfslock_lock |=
2669                                         NFSV4LOCK_WANTED;
2670                                     msleep(&dp->nfsdl_rwlock,
2671                                         NFSCLSTATEMUTEXPTR, PVFS, "nfscld",
2672                                         5 * hz);
2673                                     if (NFSCL_FORCEDISM(mp))
2674                                         goto terminate;
2675                                     goto tryagain;
2676                                 }
2677                                 while (!igotlock) {
2678                                     igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
2679                                         &islept, NFSCLSTATEMUTEXPTR, mp);
2680                                     if (igotlock == 0 && NFSCL_FORCEDISM(mp))
2681                                         goto terminate;
2682                                     if (islept)
2683                                         goto tryagain;
2684                                 }
2685                                 NFSUNLOCKCLSTATE();
2686                                 newnfs_copycred(&dp->nfsdl_cred, cred);
2687                                 ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp,
2688                                     NULL, cred, p, 1, &vp);
2689                                 if (!ret) {
2690                                     nfscl_cleandeleg(dp);
2691                                     TAILQ_REMOVE(&clp->nfsc_deleg, dp,
2692                                         nfsdl_list);
2693                                     LIST_REMOVE(dp, nfsdl_hash);
2694                                     TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2695                                     nfscl_delegcnt--;
2696                                     nfsstatsv1.cldelegates--;
2697                                 }
2698                                 NFSLOCKCLSTATE();
2699                                 /*
2700                                  * The nfsc_lock must be released before doing
2701                                  * vrele(), since it might call nfs_inactive().
2702                                  * For the unlikely case where the vnode failed
2703                                  * to be acquired by nfscl_recalldeleg(), a
2704                                  * VOP_RECLAIM() should be in progress and it
2705                                  * will return the delegation.
2706                                  */
2707                                 nfsv4_unlock(&clp->nfsc_lock, 0);
2708                                 igotlock = 0;
2709                                 if (vp != NULL) {
2710                                         NFSUNLOCKCLSTATE();
2711                                         vrele(vp);
2712                                         NFSLOCKCLSTATE();
2713                                 }
2714                                 goto tryagain;
2715                         }
2716                         dp = ndp;
2717                 }
2718
2719                 /*
2720                  * Clear out old delegations, if we are above the high water
2721                  * mark. Only clear out ones with no state related to them.
2722                  * The tailq list is in LRU order.
2723                  */
2724                 dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead);
2725                 while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) {
2726                     ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list);
2727                     if (dp->nfsdl_rwlock.nfslock_usecnt == 0 &&
2728                         dp->nfsdl_rwlock.nfslock_lock == 0 &&
2729                         dp->nfsdl_timestamp < NFSD_MONOSEC &&
2730                         (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED |
2731                           NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) {
2732                         clearok = 1;
2733                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2734                             op = LIST_FIRST(&owp->nfsow_open);
2735                             if (op != NULL) {
2736                                 clearok = 0;
2737                                 break;
2738                             }
2739                         }
2740                         if (clearok) {
2741                             LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
2742                                 if (!LIST_EMPTY(&lp->nfsl_lock)) {
2743                                     clearok = 0;
2744                                     break;
2745                                 }
2746                             }
2747                         }
2748                         if (clearok) {
2749                             TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
2750                             LIST_REMOVE(dp, nfsdl_hash);
2751                             TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list);
2752                             nfscl_delegcnt--;
2753                             nfsstatsv1.cldelegates--;
2754                         }
2755                     }
2756                     dp = ndp;
2757                 }
2758                 if (igotlock)
2759                         nfsv4_unlock(&clp->nfsc_lock, 0);
2760
2761                 /*
2762                  * Do the recall on any layouts. To avoid trouble, always
2763                  * come back up here after having slept.
2764                  */
2765                 TAILQ_INIT(&rlh);
2766 tryagain2:
2767                 TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) {
2768                         if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) {
2769                                 /*
2770                                  * Wait for outstanding I/O ops to be done.
2771                                  */
2772                                 if (lyp->nfsly_lock.nfslock_usecnt > 0 ||
2773                                     (lyp->nfsly_lock.nfslock_lock &
2774                                      NFSV4LOCK_LOCK) != 0) {
2775                                         lyp->nfsly_lock.nfslock_lock |=
2776                                             NFSV4LOCK_WANTED;
2777                                         msleep(&lyp->nfsly_lock.nfslock_lock,
2778                                             NFSCLSTATEMUTEXPTR, PVFS, "nfslyp",
2779                                             5 * hz);
2780                                         if (NFSCL_FORCEDISM(mp))
2781                                             goto terminate;
2782                                         goto tryagain2;
2783                                 }
2784                                 /* Move the layout to the recall list. */
2785                                 TAILQ_REMOVE(&clp->nfsc_layout, lyp,
2786                                     nfsly_list);
2787                                 LIST_REMOVE(lyp, nfsly_hash);
2788                                 TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list);
2789
2790                                 /* Handle any layout commits. */
2791                                 if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) &&
2792                                     (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
2793                                         lyp->nfsly_flags &= ~NFSLY_WRITTEN;
2794                                         NFSUNLOCKCLSTATE();
2795                                         NFSCL_DEBUG(3, "do layoutcommit\n");
2796                                         nfscl_dolayoutcommit(clp->nfsc_nmp, lyp,
2797                                             cred, p);
2798                                         NFSLOCKCLSTATE();
2799                                         goto tryagain2;
2800                                 }
2801                         }
2802                 }
2803
2804                 /* Now, look for stale layouts. */
2805                 lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead);
2806                 while (lyp != NULL) {
2807                         nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list);
2808                         if (lyp->nfsly_timestamp < NFSD_MONOSEC &&
2809                             (lyp->nfsly_flags & NFSLY_RECALL) == 0 &&
2810                             lyp->nfsly_lock.nfslock_usecnt == 0 &&
2811                             lyp->nfsly_lock.nfslock_lock == 0) {
2812                                 NFSCL_DEBUG(4, "ret stale lay=%d\n",
2813                                     nfscl_layoutcnt);
2814                                 recallp = malloc(sizeof(*recallp),
2815                                     M_NFSLAYRECALL, M_NOWAIT);
2816                                 if (recallp == NULL)
2817                                         break;
2818                                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE,
2819                                     lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX,
2820                                     lyp->nfsly_stateid.seqid, 0, 0, NULL,
2821                                     recallp);
2822                         }
2823                         lyp = nlyp;
2824                 }
2825
2826                 /*
2827                  * Free up any unreferenced device info structures.
2828                  */
2829                 LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) {
2830                         if (dip->nfsdi_layoutrefs == 0 &&
2831                             dip->nfsdi_refcnt == 0) {
2832                                 NFSCL_DEBUG(4, "freeing devinfo\n");
2833                                 LIST_REMOVE(dip, nfsdi_list);
2834                                 nfscl_freedevinfo(dip);
2835                         }
2836                 }
2837                 NFSUNLOCKCLSTATE();
2838
2839                 /* Do layout return(s), as required. */
2840                 TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) {
2841                         TAILQ_REMOVE(&rlh, lyp, nfsly_list);
2842                         NFSCL_DEBUG(4, "ret layout\n");
2843                         nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p);
2844                         nfscl_freelayout(lyp);
2845                 }
2846
2847                 /*
2848                  * Delegreturn any delegations cleaned out or recalled.
2849                  */
2850                 TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) {
2851                         newnfs_copycred(&dp->nfsdl_cred, cred);
2852                         (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
2853                         TAILQ_REMOVE(&dh, dp, nfsdl_list);
2854                         free(dp, M_NFSCLDELEG);
2855                 }
2856
2857                 SLIST_INIT(&lfh);
2858                 /*
2859                  * Call nfscl_cleanupkext() once per second to check for
2860                  * open/lock owners where the process has exited.
2861                  */
2862                 mytime = NFSD_MONOSEC;
2863                 if (prevsec != mytime) {
2864                         prevsec = mytime;
2865                         nfscl_cleanupkext(clp, &lfh);
2866                 }
2867
2868                 /*
2869                  * Do a ReleaseLockOwner for all lock owners where the
2870                  * associated process no longer exists, as found by
2871                  * nfscl_cleanupkext().
2872                  */
2873                 newnfs_setroot(cred);
2874                 SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) {
2875                         LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list,
2876                             nlp) {
2877                                 (void)nfsrpc_rellockown(clp->nfsc_nmp, lp,
2878                                     lfhp->nfslfh_fh, lfhp->nfslfh_len, cred,
2879                                     p);
2880                                 nfscl_freelockowner(lp, 0);
2881                         }
2882                         free(lfhp, M_TEMP);
2883                 }
2884                 SLIST_INIT(&lfh);
2885
2886                 NFSLOCKCLSTATE();
2887                 if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0)
2888                         (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl",
2889                             hz);
2890 terminate:
2891                 if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) {
2892                         clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD;
2893                         NFSUNLOCKCLSTATE();
2894                         NFSFREECRED(cred);
2895                         wakeup((caddr_t)clp);
2896                         return;
2897                 }
2898                 NFSUNLOCKCLSTATE();
2899         }
2900 }
2901
2902 /*
2903  * Initiate state recovery. Called when NFSERR_STALECLIENTID,
2904  * NFSERR_STALESTATEID or NFSERR_BADSESSION is received.
2905  */
2906 void
2907 nfscl_initiate_recovery(struct nfsclclient *clp)
2908 {
2909
2910         if (clp == NULL)
2911                 return;
2912         NFSLOCKCLSTATE();
2913         clp->nfsc_flags |= NFSCLFLAGS_RECOVER;
2914         NFSUNLOCKCLSTATE();
2915         wakeup((caddr_t)clp);
2916 }
2917
2918 /*
2919  * Dump out the state stuff for debugging.
2920  */
2921 void
2922 nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens,
2923     int lockowner, int locks)
2924 {
2925         struct nfsclclient *clp;
2926         struct nfsclowner *owp;
2927         struct nfsclopen *op;
2928         struct nfscllockowner *lp;
2929         struct nfscllock *lop;
2930         struct nfscldeleg *dp;
2931
2932         clp = nmp->nm_clp;
2933         if (clp == NULL) {
2934                 printf("nfscl dumpstate NULL clp\n");
2935                 return;
2936         }
2937         NFSLOCKCLSTATE();
2938         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
2939           LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
2940             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2941                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2942                     owp->nfsow_owner[0], owp->nfsow_owner[1],
2943                     owp->nfsow_owner[2], owp->nfsow_owner[3],
2944                     owp->nfsow_seqid);
2945             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2946                 if (opens)
2947                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2948                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2949                         op->nfso_stateid.other[2], op->nfso_opencnt,
2950                         op->nfso_fh[12]);
2951                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2952                     if (lockowner)
2953                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
2954                             lp->nfsl_owner[0], lp->nfsl_owner[1],
2955                             lp->nfsl_owner[2], lp->nfsl_owner[3],
2956                             lp->nfsl_seqid,
2957                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
2958                             lp->nfsl_stateid.other[2]);
2959                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
2960                         if (locks)
2961 #ifdef __FreeBSD__
2962                             printf("lck typ=%d fst=%ju end=%ju\n",
2963                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
2964                                 (intmax_t)lop->nfslo_end);
2965 #else
2966                             printf("lck typ=%d fst=%qd end=%qd\n",
2967                                 lop->nfslo_type, lop->nfslo_first,
2968                                 lop->nfslo_end);
2969 #endif
2970                     }
2971                 }
2972             }
2973           }
2974         }
2975         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
2976             if (openowner && !LIST_EMPTY(&owp->nfsow_open))
2977                 printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n",
2978                     owp->nfsow_owner[0], owp->nfsow_owner[1],
2979                     owp->nfsow_owner[2], owp->nfsow_owner[3],
2980                     owp->nfsow_seqid);
2981             LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
2982                 if (opens)
2983                     printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n",
2984                         op->nfso_stateid.other[0], op->nfso_stateid.other[1],
2985                         op->nfso_stateid.other[2], op->nfso_opencnt,
2986                         op->nfso_fh[12]);
2987                 LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
2988                     if (lockowner)
2989                         printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n",
2990                             lp->nfsl_owner[0], lp->nfsl_owner[1],
2991                             lp->nfsl_owner[2], lp->nfsl_owner[3],
2992                             lp->nfsl_seqid,
2993                             lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1],
2994                             lp->nfsl_stateid.other[2]);
2995                     LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
2996                         if (locks)
2997 #ifdef __FreeBSD__
2998                             printf("lck typ=%d fst=%ju end=%ju\n",
2999                                 lop->nfslo_type, (intmax_t)lop->nfslo_first,
3000                                 (intmax_t)lop->nfslo_end);
3001 #else
3002                             printf("lck typ=%d fst=%qd end=%qd\n",
3003                                 lop->nfslo_type, lop->nfslo_first,
3004                                 lop->nfslo_end);
3005 #endif
3006                     }
3007                 }
3008             }
3009         }
3010         NFSUNLOCKCLSTATE();
3011 }
3012
3013 /*
3014  * Check for duplicate open owners and opens.
3015  * (Only used as a diagnostic aid.)
3016  */
3017 void
3018 nfscl_dupopen(vnode_t vp, int dupopens)
3019 {
3020         struct nfsclclient *clp;
3021         struct nfsclowner *owp, *owp2;
3022         struct nfsclopen *op, *op2;
3023         struct nfsfh *nfhp;
3024
3025         clp = VFSTONFS(vp->v_mount)->nm_clp;
3026         if (clp == NULL) {
3027                 printf("nfscl dupopen NULL clp\n");
3028                 return;
3029         }
3030         nfhp = VTONFS(vp)->n_fhp;
3031         NFSLOCKCLSTATE();
3032
3033         /*
3034          * First, search for duplicate owners.
3035          * These should never happen!
3036          */
3037         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3038             LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3039                 if (owp != owp2 &&
3040                     !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner,
3041                     NFSV4CL_LOCKNAMELEN)) {
3042                         NFSUNLOCKCLSTATE();
3043                         printf("DUP OWNER\n");
3044                         nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3045                         return;
3046                 }
3047             }
3048         }
3049
3050         /*
3051          * Now, search for duplicate stateids.
3052          * These shouldn't happen, either.
3053          */
3054         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3055             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3056                 LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3057                     LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3058                         if (op != op2 &&
3059                             (op->nfso_stateid.other[0] != 0 ||
3060                              op->nfso_stateid.other[1] != 0 ||
3061                              op->nfso_stateid.other[2] != 0) &&
3062                             op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] &&
3063                             op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] &&
3064                             op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) {
3065                             NFSUNLOCKCLSTATE();
3066                             printf("DUP STATEID\n");
3067                             nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0, 0);
3068                             return;
3069                         }
3070                     }
3071                 }
3072             }
3073         }
3074
3075         /*
3076          * Now search for duplicate opens.
3077          * Duplicate opens for the same owner
3078          * should never occur. Other duplicates are
3079          * possible and are checked for if "dupopens"
3080          * is true.
3081          */
3082         LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) {
3083             LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) {
3084                 if (nfhp->nfh_len == op2->nfso_fhlen &&
3085                     !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) {
3086                     LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3087                         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3088                             if (op != op2 && nfhp->nfh_len == op->nfso_fhlen &&
3089                                 !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) &&
3090                                 (!NFSBCMP(op->nfso_own->nfsow_owner,
3091                                  op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) ||
3092                                  dupopens)) {
3093                                 if (!NFSBCMP(op->nfso_own->nfsow_owner,
3094                                     op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
3095                                     NFSUNLOCKCLSTATE();
3096                                     printf("BADDUP OPEN\n");
3097                                 } else {
3098                                     NFSUNLOCKCLSTATE();
3099                                     printf("DUP OPEN\n");
3100                                 }
3101                                 nfscl_dumpstate(VFSTONFS(vp->v_mount), 1, 1, 0,
3102                                     0);
3103                                 return;
3104                             }
3105                         }
3106                     }
3107                 }
3108             }
3109         }
3110         NFSUNLOCKCLSTATE();
3111 }
3112
3113 /*
3114  * During close, find an open that needs to be dereferenced and
3115  * dereference it. If there are no more opens for this file,
3116  * log a message to that effect.
3117  * Opens aren't actually Close'd until VOP_INACTIVE() is performed
3118  * on the file's vnode.
3119  * This is the safe way, since it is difficult to identify
3120  * which open the close is for and I/O can be performed after the
3121  * close(2) system call when a file is mmap'd.
3122  * If it returns 0 for success, there will be a referenced
3123  * clp returned via clpp.
3124  */
3125 int
3126 nfscl_getclose(vnode_t vp, struct nfsclclient **clpp)
3127 {
3128         struct nfsclclient *clp;
3129         struct nfsclowner *owp;
3130         struct nfsclopen *op;
3131         struct nfscldeleg *dp;
3132         struct nfsfh *nfhp;
3133         int error, notdecr;
3134
3135         error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3136         if (error)
3137                 return (error);
3138         *clpp = clp;
3139
3140         nfhp = VTONFS(vp)->n_fhp;
3141         notdecr = 1;
3142         NFSLOCKCLSTATE();
3143         /*
3144          * First, look for one under a delegation that was locally issued
3145          * and just decrement the opencnt for it. Since all my Opens against
3146          * the server are DENY_NONE, I don't see a problem with hanging
3147          * onto them. (It is much easier to use one of the extant Opens
3148          * that I already have on the server when a Delegation is recalled
3149          * than to do fresh Opens.) Someday, I might need to rethink this, but.
3150          */
3151         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3152         if (dp != NULL) {
3153                 LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
3154                         op = LIST_FIRST(&owp->nfsow_open);
3155                         if (op != NULL) {
3156                                 /*
3157                                  * Since a delegation is for a file, there
3158                                  * should never be more than one open for
3159                                  * each openowner.
3160                                  */
3161                                 if (LIST_NEXT(op, nfso_list) != NULL)
3162                                         panic("nfscdeleg opens");
3163                                 if (notdecr && op->nfso_opencnt > 0) {
3164                                         notdecr = 0;
3165                                         op->nfso_opencnt--;
3166                                         break;
3167                                 }
3168                         }
3169                 }
3170         }
3171
3172         /* Now process the opens against the server. */
3173         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3174                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3175                         if (op->nfso_fhlen == nfhp->nfh_len &&
3176                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3177                             nfhp->nfh_len)) {
3178                                 /* Found an open, decrement cnt if possible */
3179                                 if (notdecr && op->nfso_opencnt > 0) {
3180                                         notdecr = 0;
3181                                         op->nfso_opencnt--;
3182                                 }
3183                                 /*
3184                                  * There are more opens, so just return.
3185                                  */
3186                                 if (op->nfso_opencnt > 0) {
3187                                         NFSUNLOCKCLSTATE();
3188                                         return (0);
3189                                 }
3190                         }
3191                 }
3192         }
3193         NFSUNLOCKCLSTATE();
3194         if (notdecr)
3195                 printf("nfscl: never fnd open\n");
3196         return (0);
3197 }
3198
3199 int
3200 nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p)
3201 {
3202         struct nfsclclient *clp;
3203         struct nfsclowner *owp, *nowp;
3204         struct nfsclopen *op;
3205         struct nfscldeleg *dp;
3206         struct nfsfh *nfhp;
3207         struct nfsclrecalllayout *recallp;
3208         int error;
3209
3210         error = nfscl_getcl(vp->v_mount, NULL, NULL, 1, &clp);
3211         if (error)
3212                 return (error);
3213         *clpp = clp;
3214
3215         nfhp = VTONFS(vp)->n_fhp;
3216         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
3217         NFSLOCKCLSTATE();
3218         /*
3219          * First get rid of the local Open structures, which should be no
3220          * longer in use.
3221          */
3222         dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len);
3223         if (dp != NULL) {
3224                 LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) {
3225                         op = LIST_FIRST(&owp->nfsow_open);
3226                         if (op != NULL) {
3227                                 KASSERT((op->nfso_opencnt == 0),
3228                                     ("nfscl: bad open cnt on deleg"));
3229                                 nfscl_freeopen(op, 1);
3230                         }
3231                         nfscl_freeopenowner(owp, 1);
3232                 }
3233         }
3234
3235         /* Return any layouts marked return on close. */
3236         nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp);
3237
3238         /* Now process the opens against the server. */
3239 lookformore:
3240         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3241                 op = LIST_FIRST(&owp->nfsow_open);
3242                 while (op != NULL) {
3243                         if (op->nfso_fhlen == nfhp->nfh_len &&
3244                             !NFSBCMP(op->nfso_fh, nfhp->nfh_fh,
3245                             nfhp->nfh_len)) {
3246                                 /* Found an open, close it. */
3247 #ifdef DIAGNOSTIC
3248                                 KASSERT((op->nfso_opencnt == 0),
3249                                     ("nfscl: bad open cnt on server (%d)",
3250                                      op->nfso_opencnt));
3251 #endif
3252                                 NFSUNLOCKCLSTATE();
3253                                 nfsrpc_doclose(VFSTONFS(vp->v_mount), op, p);
3254                                 NFSLOCKCLSTATE();
3255                                 goto lookformore;
3256                         }
3257                         op = LIST_NEXT(op, nfso_list);
3258                 }
3259         }
3260         NFSUNLOCKCLSTATE();
3261         /*
3262          * recallp has been set NULL by nfscl_retoncloselayout() if it was
3263          * used by the function, but calling free() with a NULL pointer is ok.
3264          */
3265         free(recallp, M_NFSLAYRECALL);
3266         return (0);
3267 }
3268
3269 /*
3270  * Return all delegations on this client.
3271  * (Must be called with client sleep lock.)
3272  */
3273 static void
3274 nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p)
3275 {
3276         struct nfscldeleg *dp, *ndp;
3277         struct ucred *cred;
3278
3279         cred = newnfs_getcred();
3280         TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) {
3281                 nfscl_cleandeleg(dp);
3282                 (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p);
3283                 nfscl_freedeleg(&clp->nfsc_deleg, dp);
3284         }
3285         NFSFREECRED(cred);
3286 }
3287
3288 /*
3289  * Do a callback RPC.
3290  */
3291 void
3292 nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p)
3293 {
3294         int clist, gotseq_ok, i, j, k, op, rcalls;
3295         u_int32_t *tl;
3296         struct nfsclclient *clp;
3297         struct nfscldeleg *dp = NULL;
3298         int numops, taglen = -1, error = 0, trunc __unused;
3299         u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident;
3300         u_char tag[NFSV4_SMALLSTR + 1], *tagstr;
3301         vnode_t vp = NULL;
3302         struct nfsnode *np;
3303         struct vattr va;
3304         struct nfsfh *nfhp;
3305         mount_t mp;
3306         nfsattrbit_t attrbits, rattrbits;
3307         nfsv4stateid_t stateid;
3308         uint32_t seqid, slotid = 0, highslot, cachethis __unused;
3309         uint8_t sessionid[NFSX_V4SESSIONID];
3310         struct mbuf *rep;
3311         struct nfscllayout *lyp;
3312         uint64_t filesid[2], len, off;
3313         int changed, gotone, laytype, recalltype;
3314         uint32_t iomode;
3315         struct nfsclrecalllayout *recallp = NULL;
3316         struct nfsclsession *tsep;
3317
3318         gotseq_ok = 0;
3319         nfsrvd_rephead(nd);
3320         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3321         taglen = fxdr_unsigned(int, *tl);
3322         if (taglen < 0) {
3323                 error = EBADRPC;
3324                 goto nfsmout;
3325         }
3326         if (taglen <= NFSV4_SMALLSTR)
3327                 tagstr = tag;
3328         else
3329                 tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK);
3330         error = nfsrv_mtostr(nd, tagstr, taglen);
3331         if (error) {
3332                 if (taglen > NFSV4_SMALLSTR)
3333                         free(tagstr, M_TEMP);
3334                 taglen = -1;
3335                 goto nfsmout;
3336         }
3337         (void) nfsm_strtom(nd, tag, taglen);
3338         if (taglen > NFSV4_SMALLSTR) {
3339                 free(tagstr, M_TEMP);
3340         }
3341         NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED);
3342         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3343         minorvers = fxdr_unsigned(u_int32_t, *tl++);
3344         if (minorvers != NFSV4_MINORVERSION &&
3345             minorvers != NFSV41_MINORVERSION &&
3346             minorvers != NFSV42_MINORVERSION)
3347                 nd->nd_repstat = NFSERR_MINORVERMISMATCH;
3348         cbident = fxdr_unsigned(u_int32_t, *tl++);
3349         if (nd->nd_repstat)
3350                 numops = 0;
3351         else
3352                 numops = fxdr_unsigned(int, *tl);
3353         /*
3354          * Loop around doing the sub ops.
3355          */
3356         for (i = 0; i < numops; i++) {
3357                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3358                 NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED);
3359                 *repp++ = *tl;
3360                 op = fxdr_unsigned(int, *tl);
3361                 if (op < NFSV4OP_CBGETATTR ||
3362                    (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) ||
3363                    (op > NFSV4OP_CBNOTIFYDEVID &&
3364                     minorvers == NFSV41_MINORVERSION) ||
3365                    (op > NFSV4OP_CBOFFLOAD &&
3366                     minorvers == NFSV42_MINORVERSION)) {
3367                     nd->nd_repstat = NFSERR_OPILLEGAL;
3368                     *repp = nfscl_errmap(nd, minorvers);
3369                     retops++;
3370                     break;
3371                 }
3372                 nd->nd_procnum = op;
3373                 if (op < NFSV42_CBNOPS)
3374                         nfsstatsv1.cbrpccnt[nd->nd_procnum]++;
3375                 switch (op) {
3376                 case NFSV4OP_CBGETATTR:
3377                         NFSCL_DEBUG(4, "cbgetattr\n");
3378                         mp = NULL;
3379                         vp = NULL;
3380                         error = nfsm_getfh(nd, &nfhp);
3381                         if (!error)
3382                                 error = nfsrv_getattrbits(nd, &attrbits,
3383                                     NULL, NULL);
3384                         if (error == 0 && i == 0 &&
3385                             minorvers != NFSV4_MINORVERSION)
3386                                 error = NFSERR_OPNOTINSESS;
3387                         if (!error) {
3388                                 mp = nfscl_getmnt(minorvers, sessionid, cbident,
3389                                     &clp);
3390                                 if (mp == NULL)
3391                                         error = NFSERR_SERVERFAULT;
3392                         }
3393                         if (!error) {
3394                                 error = nfscl_ngetreopen(mp, nfhp->nfh_fh,
3395                                     nfhp->nfh_len, p, &np);
3396                                 if (!error)
3397                                         vp = NFSTOV(np);
3398                         }
3399                         if (!error) {
3400                                 NFSZERO_ATTRBIT(&rattrbits);
3401                                 NFSLOCKCLSTATE();
3402                                 dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3403                                     nfhp->nfh_len);
3404                                 if (dp != NULL) {
3405                                         if (NFSISSET_ATTRBIT(&attrbits,
3406                                             NFSATTRBIT_SIZE)) {
3407                                                 if (vp != NULL)
3408                                                         va.va_size = np->n_size;
3409                                                 else
3410                                                         va.va_size =
3411                                                             dp->nfsdl_size;
3412                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3413                                                     NFSATTRBIT_SIZE);
3414                                         }
3415                                         if (NFSISSET_ATTRBIT(&attrbits,
3416                                             NFSATTRBIT_CHANGE)) {
3417                                                 va.va_filerev =
3418                                                     dp->nfsdl_change;
3419                                                 if (vp == NULL ||
3420                                                     (np->n_flag & NDELEGMOD))
3421                                                         va.va_filerev++;
3422                                                 NFSSETBIT_ATTRBIT(&rattrbits,
3423                                                     NFSATTRBIT_CHANGE);
3424                                         }
3425                                 } else
3426                                         error = NFSERR_SERVERFAULT;
3427                                 NFSUNLOCKCLSTATE();
3428                         }
3429                         if (vp != NULL)
3430                                 vrele(vp);
3431                         if (mp != NULL)
3432                                 vfs_unbusy(mp);
3433                         if (nfhp != NULL)
3434                                 free(nfhp, M_NFSFH);
3435                         if (!error)
3436                                 (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va,
3437                                     NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0,
3438                                     (uint64_t)0, NULL);
3439                         break;
3440                 case NFSV4OP_CBRECALL:
3441                         NFSCL_DEBUG(4, "cbrecall\n");
3442                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
3443                             NFSX_UNSIGNED);
3444                         stateid.seqid = *tl++;
3445                         NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other,
3446                             NFSX_STATEIDOTHER);
3447                         tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3448                         trunc = fxdr_unsigned(int, *tl);
3449                         error = nfsm_getfh(nd, &nfhp);
3450                         if (error == 0 && i == 0 &&
3451                             minorvers != NFSV4_MINORVERSION)
3452                                 error = NFSERR_OPNOTINSESS;
3453                         if (!error) {
3454                                 NFSLOCKCLSTATE();
3455                                 if (minorvers == NFSV4_MINORVERSION)
3456                                         clp = nfscl_getclnt(cbident);
3457                                 else
3458                                         clp = nfscl_getclntsess(sessionid);
3459                                 if (clp != NULL) {
3460                                         dp = nfscl_finddeleg(clp, nfhp->nfh_fh,
3461                                             nfhp->nfh_len);
3462                                         if (dp != NULL && (dp->nfsdl_flags &
3463                                             NFSCLDL_DELEGRET) == 0) {
3464                                                 dp->nfsdl_flags |=
3465                                                     NFSCLDL_RECALL;
3466                                                 wakeup((caddr_t)clp);
3467                                         }
3468                                 } else {
3469                                         error = NFSERR_SERVERFAULT;
3470                                 }
3471                                 NFSUNLOCKCLSTATE();
3472                         }
3473                         if (nfhp != NULL)
3474                                 free(nfhp, M_NFSFH);
3475                         break;
3476                 case NFSV4OP_CBLAYOUTRECALL:
3477                         NFSCL_DEBUG(4, "cblayrec\n");
3478                         nfhp = NULL;
3479                         NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
3480                         laytype = fxdr_unsigned(int, *tl++);
3481                         iomode = fxdr_unsigned(uint32_t, *tl++);
3482                         if (newnfs_true == *tl++)
3483                                 changed = 1;
3484                         else
3485                                 changed = 0;
3486                         recalltype = fxdr_unsigned(int, *tl);
3487                         NFSCL_DEBUG(4, "layt=%d iom=%d ch=%d rectyp=%d\n",
3488                             laytype, iomode, changed, recalltype);
3489                         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL,
3490                             M_WAITOK);
3491                         if (laytype != NFSLAYOUT_NFSV4_1_FILES &&
3492                             laytype != NFSLAYOUT_FLEXFILE)
3493                                 error = NFSERR_NOMATCHLAYOUT;
3494                         else if (recalltype == NFSLAYOUTRETURN_FILE) {
3495                                 error = nfsm_getfh(nd, &nfhp);
3496                                 NFSCL_DEBUG(4, "retfile getfh=%d\n", error);
3497                                 if (error != 0)
3498                                         goto nfsmout;
3499                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER +
3500                                     NFSX_STATEID);
3501                                 off = fxdr_hyper(tl); tl += 2;
3502                                 len = fxdr_hyper(tl); tl += 2;
3503                                 stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
3504                                 NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
3505                                 if (minorvers == NFSV4_MINORVERSION)
3506                                         error = NFSERR_NOTSUPP;
3507                                 else if (i == 0)
3508                                         error = NFSERR_OPNOTINSESS;
3509                                 NFSCL_DEBUG(4, "off=%ju len=%ju sq=%u err=%d\n",
3510                                     (uintmax_t)off, (uintmax_t)len,
3511                                     stateid.seqid, error);
3512                                 if (error == 0) {
3513                                         NFSLOCKCLSTATE();
3514                                         clp = nfscl_getclntsess(sessionid);
3515                                         NFSCL_DEBUG(4, "cbly clp=%p\n", clp);
3516                                         if (clp != NULL) {
3517                                                 lyp = nfscl_findlayout(clp,
3518                                                     nfhp->nfh_fh,
3519                                                     nfhp->nfh_len);
3520                                                 NFSCL_DEBUG(4, "cblyp=%p\n",
3521                                                     lyp);
3522                                                 if (lyp != NULL &&
3523                                                     (lyp->nfsly_flags &
3524                                                      (NFSLY_FILES |
3525                                                       NFSLY_FLEXFILE)) != 0 &&
3526                                                     !NFSBCMP(stateid.other,
3527                                                     lyp->nfsly_stateid.other,
3528                                                     NFSX_STATEIDOTHER)) {
3529                                                         error =
3530                                                             nfscl_layoutrecall(
3531                                                             recalltype,
3532                                                             lyp, iomode, off,
3533                                                             len, stateid.seqid,
3534                                                             0, 0, NULL,
3535                                                             recallp);
3536                                                         if (error == 0 &&
3537                                                             stateid.seqid >
3538                                                             lyp->nfsly_stateid.seqid)
3539                                                                 lyp->nfsly_stateid.seqid =
3540                                                                     stateid.seqid;
3541                                                         recallp = NULL;
3542                                                         wakeup(clp);
3543                                                         NFSCL_DEBUG(4,
3544                                                             "aft layrcal=%d "
3545                                                             "layseqid=%d\n",
3546                                                             error,
3547                                                             lyp->nfsly_stateid.seqid);
3548                                                 } else
3549                                                         error =
3550                                                           NFSERR_NOMATCHLAYOUT;
3551                                         } else
3552                                                 error = NFSERR_NOMATCHLAYOUT;
3553                                         NFSUNLOCKCLSTATE();
3554                                 }
3555                                 free(nfhp, M_NFSFH);
3556                         } else if (recalltype == NFSLAYOUTRETURN_FSID) {
3557                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER);
3558                                 filesid[0] = fxdr_hyper(tl); tl += 2;
3559                                 filesid[1] = fxdr_hyper(tl); tl += 2;
3560                                 gotone = 0;
3561                                 NFSLOCKCLSTATE();
3562                                 clp = nfscl_getclntsess(sessionid);
3563                                 if (clp != NULL) {
3564                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3565                                             nfsly_list) {
3566                                                 if (lyp->nfsly_filesid[0] ==
3567                                                     filesid[0] &&
3568                                                     lyp->nfsly_filesid[1] ==
3569                                                     filesid[1]) {
3570                                                         error =
3571                                                             nfscl_layoutrecall(
3572                                                             recalltype,
3573                                                             lyp, iomode, 0,
3574                                                             UINT64_MAX,
3575                                                             lyp->nfsly_stateid.seqid,
3576                                                             0, 0, NULL,
3577                                                             recallp);
3578                                                         recallp = NULL;
3579                                                         gotone = 1;
3580                                                 }
3581                                         }
3582                                         if (gotone != 0)
3583                                                 wakeup(clp);
3584                                         else
3585                                                 error = NFSERR_NOMATCHLAYOUT;
3586                                 } else
3587                                         error = NFSERR_NOMATCHLAYOUT;
3588                                 NFSUNLOCKCLSTATE();
3589                         } else if (recalltype == NFSLAYOUTRETURN_ALL) {
3590                                 gotone = 0;
3591                                 NFSLOCKCLSTATE();
3592                                 clp = nfscl_getclntsess(sessionid);
3593                                 if (clp != NULL) {
3594                                         TAILQ_FOREACH(lyp, &clp->nfsc_layout,
3595                                             nfsly_list) {
3596                                                 error = nfscl_layoutrecall(
3597                                                     recalltype, lyp, iomode, 0,
3598                                                     UINT64_MAX,
3599                                                     lyp->nfsly_stateid.seqid,
3600                                                     0, 0, NULL, recallp);
3601                                                 recallp = NULL;
3602                                                 gotone = 1;
3603                                         }
3604                                         if (gotone != 0)
3605                                                 wakeup(clp);
3606                                         else
3607                                                 error = NFSERR_NOMATCHLAYOUT;
3608                                 } else
3609                                         error = NFSERR_NOMATCHLAYOUT;
3610                                 NFSUNLOCKCLSTATE();
3611                         } else
3612                                 error = NFSERR_NOMATCHLAYOUT;
3613                         if (recallp != NULL) {
3614                                 free(recallp, M_NFSLAYRECALL);
3615                                 recallp = NULL;
3616                         }
3617                         break;
3618                 case NFSV4OP_CBSEQUENCE:
3619                         NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3620                             5 * NFSX_UNSIGNED);
3621                         bcopy(tl, sessionid, NFSX_V4SESSIONID);
3622                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3623                         seqid = fxdr_unsigned(uint32_t, *tl++);
3624                         slotid = fxdr_unsigned(uint32_t, *tl++);
3625                         highslot = fxdr_unsigned(uint32_t, *tl++);
3626                         cachethis = *tl++;
3627                         /* Throw away the referring call stuff. */
3628                         clist = fxdr_unsigned(int, *tl);
3629                         for (j = 0; j < clist; j++) {
3630                                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
3631                                     NFSX_UNSIGNED);
3632                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3633                                 rcalls = fxdr_unsigned(int, *tl);
3634                                 for (k = 0; k < rcalls; k++) {
3635                                         NFSM_DISSECT(tl, uint32_t *,
3636                                             2 * NFSX_UNSIGNED);
3637                                 }
3638                         }
3639                         NFSLOCKCLSTATE();
3640                         if (i == 0) {
3641                                 clp = nfscl_getclntsess(sessionid);
3642                                 if (clp == NULL)
3643                                         error = NFSERR_SERVERFAULT;
3644                         } else
3645                                 error = NFSERR_SEQUENCEPOS;
3646                         if (error == 0) {
3647                                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3648                                 error = nfsv4_seqsession(seqid, slotid,
3649                                     highslot, tsep->nfsess_cbslots, &rep,
3650                                     tsep->nfsess_backslots);
3651                         }
3652                         NFSUNLOCKCLSTATE();
3653                         if (error == 0 || error == NFSERR_REPLYFROMCACHE) {
3654                                 gotseq_ok = 1;
3655                                 if (rep != NULL) {
3656                                         /*
3657                                          * Handle a reply for a retried
3658                                          * callback.  The reply will be
3659                                          * re-inserted in the session cache
3660                                          * by the nfsv4_seqsess_cacherep() call
3661                                          * after out:
3662                                          */
3663                                         KASSERT(error == NFSERR_REPLYFROMCACHE,
3664                                             ("cbsequence: non-NULL rep"));
3665                                         NFSCL_DEBUG(4, "Got cbretry\n");
3666                                         m_freem(nd->nd_mreq);
3667                                         nd->nd_mreq = rep;
3668                                         rep = NULL;
3669                                         goto out;
3670                                 }
3671                                 NFSM_BUILD(tl, uint32_t *,
3672                                     NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED);
3673                                 bcopy(sessionid, tl, NFSX_V4SESSIONID);
3674                                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
3675                                 *tl++ = txdr_unsigned(seqid);
3676                                 *tl++ = txdr_unsigned(slotid);
3677                                 *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1);
3678                                 *tl = txdr_unsigned(NFSV4_CBSLOTS - 1);
3679                         }
3680                         break;
3681                 default:
3682                         if (i == 0 && minorvers != NFSV4_MINORVERSION)
3683                                 error = NFSERR_OPNOTINSESS;
3684                         else {
3685                                 NFSCL_DEBUG(1, "unsupp callback %d\n", op);
3686                                 error = NFSERR_NOTSUPP;
3687                         }
3688                         break;
3689                 }
3690                 if (error) {
3691                         if (error == EBADRPC || error == NFSERR_BADXDR) {
3692                                 nd->nd_repstat = NFSERR_BADXDR;
3693                         } else {
3694                                 nd->nd_repstat = error;
3695                         }
3696                         error = 0;
3697                 }
3698                 retops++;
3699                 if (nd->nd_repstat) {
3700                         *repp = nfscl_errmap(nd, minorvers);
3701                         break;
3702                 } else
3703                         *repp = 0;      /* NFS4_OK */
3704         }
3705 nfsmout:
3706         if (recallp != NULL)
3707                 free(recallp, M_NFSLAYRECALL);
3708         if (error) {
3709                 if (error == EBADRPC || error == NFSERR_BADXDR)
3710                         nd->nd_repstat = NFSERR_BADXDR;
3711                 else
3712                         printf("nfsv4 comperr1=%d\n", error);
3713         }
3714         if (taglen == -1) {
3715                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3716                 *tl++ = 0;
3717                 *tl = 0;
3718         } else {
3719                 *retopsp = txdr_unsigned(retops);
3720         }
3721         *nd->nd_errp = nfscl_errmap(nd, minorvers);
3722 out:
3723         if (gotseq_ok != 0) {
3724                 rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
3725                 NFSLOCKCLSTATE();
3726                 clp = nfscl_getclntsess(sessionid);
3727                 if (clp != NULL) {
3728                         tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3729                         nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots,
3730                             NFSERR_OK, &rep);
3731                         NFSUNLOCKCLSTATE();
3732                 } else {
3733                         NFSUNLOCKCLSTATE();
3734                         m_freem(rep);
3735                 }
3736         }
3737 }
3738
3739 /*
3740  * Generate the next cbident value. Basically just increment a static value
3741  * and then check that it isn't already in the list, if it has wrapped around.
3742  */
3743 static u_int32_t
3744 nfscl_nextcbident(void)
3745 {
3746         struct nfsclclient *clp;
3747         int matched;
3748         static u_int32_t nextcbident = 0;
3749         static int haswrapped = 0;
3750
3751         nextcbident++;
3752         if (nextcbident == 0)
3753                 haswrapped = 1;
3754         if (haswrapped) {
3755                 /*
3756                  * Search the clientid list for one already using this cbident.
3757                  */
3758                 do {
3759                         matched = 0;
3760                         NFSLOCKCLSTATE();
3761                         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3762                                 if (clp->nfsc_cbident == nextcbident) {
3763                                         matched = 1;
3764                                         break;
3765                                 }
3766                         }
3767                         NFSUNLOCKCLSTATE();
3768                         if (matched == 1)
3769                                 nextcbident++;
3770                 } while (matched);
3771         }
3772         return (nextcbident);
3773 }
3774
3775 /*
3776  * Get the mount point related to a given cbident or session and busy it.
3777  */
3778 static mount_t
3779 nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident,
3780     struct nfsclclient **clpp)
3781 {
3782         struct nfsclclient *clp;
3783         mount_t mp;
3784         int error;
3785         struct nfsclsession *tsep;
3786
3787         *clpp = NULL;
3788         NFSLOCKCLSTATE();
3789         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3790                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3791                 if (minorvers == NFSV4_MINORVERSION) {
3792                         if (clp->nfsc_cbident == cbident)
3793                                 break;
3794                 } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3795                     NFSX_V4SESSIONID))
3796                         break;
3797         }
3798         if (clp == NULL) {
3799                 NFSUNLOCKCLSTATE();
3800                 return (NULL);
3801         }
3802         mp = clp->nfsc_nmp->nm_mountp;
3803         vfs_ref(mp);
3804         NFSUNLOCKCLSTATE();
3805         error = vfs_busy(mp, 0);
3806         vfs_rel(mp);
3807         if (error != 0)
3808                 return (NULL);
3809         *clpp = clp;
3810         return (mp);
3811 }
3812
3813 /*
3814  * Get the clientid pointer related to a given cbident.
3815  */
3816 static struct nfsclclient *
3817 nfscl_getclnt(u_int32_t cbident)
3818 {
3819         struct nfsclclient *clp;
3820
3821         LIST_FOREACH(clp, &nfsclhead, nfsc_list)
3822                 if (clp->nfsc_cbident == cbident)
3823                         break;
3824         return (clp);
3825 }
3826
3827 /*
3828  * Get the clientid pointer related to a given sessionid.
3829  */
3830 static struct nfsclclient *
3831 nfscl_getclntsess(uint8_t *sessionid)
3832 {
3833         struct nfsclclient *clp;
3834         struct nfsclsession *tsep;
3835
3836         LIST_FOREACH(clp, &nfsclhead, nfsc_list) {
3837                 tsep = nfsmnt_mdssession(clp->nfsc_nmp);
3838                 if (!NFSBCMP(tsep->nfsess_sessionid, sessionid,
3839                     NFSX_V4SESSIONID))
3840                         break;
3841         }
3842         return (clp);
3843 }
3844
3845 /*
3846  * Search for a lock conflict locally on the client. A conflict occurs if
3847  * - not same owner and overlapping byte range and at least one of them is
3848  *   a write lock or this is an unlock.
3849  */
3850 static int
3851 nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen,
3852     struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp,
3853     struct nfscllock **lopp)
3854 {
3855         struct nfsclowner *owp;
3856         struct nfsclopen *op;
3857         int ret;
3858
3859         if (dp != NULL) {
3860                 ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp);
3861                 if (ret)
3862                         return (ret);
3863         }
3864         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
3865                 LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
3866                         if (op->nfso_fhlen == fhlen &&
3867                             !NFSBCMP(op->nfso_fh, fhp, fhlen)) {
3868                                 ret = nfscl_checkconflict(&op->nfso_lock, nlop,
3869                                     own, lopp);
3870                                 if (ret)
3871                                         return (ret);
3872                         }
3873                 }
3874         }
3875         return (0);
3876 }
3877
3878 static int
3879 nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
3880     u_int8_t *own, struct nfscllock **lopp)
3881 {
3882         struct nfscllockowner *lp;
3883         struct nfscllock *lop;
3884
3885         LIST_FOREACH(lp, lhp, nfsl_list) {
3886                 if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) {
3887                         LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) {
3888                                 if (lop->nfslo_first >= nlop->nfslo_end)
3889                                         break;
3890                                 if (lop->nfslo_end <= nlop->nfslo_first)
3891                                         continue;
3892                                 if (lop->nfslo_type == F_WRLCK ||
3893                                     nlop->nfslo_type == F_WRLCK ||
3894                                     nlop->nfslo_type == F_UNLCK) {
3895                                         if (lopp != NULL)
3896                                                 *lopp = lop;
3897                                         return (NFSERR_DENIED);
3898                                 }
3899                         }
3900                 }
3901         }
3902         return (0);
3903 }
3904
3905 /*
3906  * Check for a local conflicting lock.
3907  */
3908 int
3909 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
3910     u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
3911 {
3912         struct nfscllock *lop, nlck;
3913         struct nfscldeleg *dp;
3914         struct nfsnode *np;
3915         u_int8_t own[NFSV4CL_LOCKNAMELEN];
3916         int error;
3917
3918         nlck.nfslo_type = fl->l_type;
3919         nlck.nfslo_first = off;
3920         if (len == NFS64BITSSET) {
3921                 nlck.nfslo_end = NFS64BITSSET;
3922         } else {
3923                 nlck.nfslo_end = off + len;
3924                 if (nlck.nfslo_end <= nlck.nfslo_first)
3925                         return (NFSERR_INVAL);
3926         }
3927         np = VTONFS(vp);
3928         nfscl_filllockowner(id, own, flags);
3929         NFSLOCKCLSTATE();
3930         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
3931         error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
3932             &nlck, own, dp, &lop);
3933         if (error != 0) {
3934                 fl->l_whence = SEEK_SET;
3935                 fl->l_start = lop->nfslo_first;
3936                 if (lop->nfslo_end == NFS64BITSSET)
3937                         fl->l_len = 0;
3938                 else
3939                         fl->l_len = lop->nfslo_end - lop->nfslo_first;
3940                 fl->l_pid = (pid_t)0;
3941                 fl->l_type = lop->nfslo_type;
3942                 error = -1;                     /* no RPC required */
3943         } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) ||
3944             fl->l_type == F_RDLCK)) {
3945                 /*
3946                  * The delegation ensures that there isn't a conflicting
3947                  * lock on the server, so return -1 to indicate an RPC
3948                  * isn't required.
3949                  */
3950                 fl->l_type = F_UNLCK;
3951                 error = -1;
3952         }
3953         NFSUNLOCKCLSTATE();
3954         return (error);
3955 }
3956
3957 /*
3958  * Handle Recall of a delegation.
3959  * The clp must be exclusive locked when this is called.
3960  */
3961 static int
3962 nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp,
3963     struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p,
3964     int called_from_renewthread, vnode_t *vpp)
3965 {
3966         struct nfsclowner *owp, *lowp, *nowp;
3967         struct nfsclopen *op, *lop;
3968         struct nfscllockowner *lp;
3969         struct nfscllock *lckp;
3970         struct nfsnode *np;
3971         int error = 0, ret;
3972
3973         if (vp == NULL) {
3974                 KASSERT(vpp != NULL, ("nfscl_recalldeleg: vpp NULL"));
3975                 *vpp = NULL;
3976                 /*
3977                  * First, get a vnode for the file. This is needed to do RPCs.
3978                  */
3979                 ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh,
3980                     dp->nfsdl_fhlen, p, &np);
3981                 if (ret) {
3982                         /*
3983                          * File isn't open, so nothing to move over to the
3984                          * server.
3985                          */
3986                         return (0);
3987                 }
3988                 vp = NFSTOV(np);
3989                 *vpp = vp;
3990         } else {
3991                 np = VTONFS(vp);
3992         }
3993         dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET;
3994
3995         /*
3996          * Ok, if it's a write delegation, flush data to the server, so
3997          * that close/open consistency is retained.
3998          */
3999         ret = 0;
4000         NFSLOCKNODE(np);
4001         if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) {
4002                 np->n_flag |= NDELEGRECALL;
4003                 NFSUNLOCKNODE(np);
4004                 ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread);
4005                 NFSLOCKNODE(np);
4006                 np->n_flag &= ~NDELEGRECALL;
4007         }
4008         NFSINVALATTRCACHE(np);
4009         NFSUNLOCKNODE(np);
4010         if (ret == EIO && called_from_renewthread != 0) {
4011                 /*
4012                  * If the flush failed with EIO for the renew thread,
4013                  * return now, so that the dirty buffer will be flushed
4014                  * later.
4015                  */
4016                 return (ret);
4017         }
4018
4019         /*
4020          * Now, for each openowner with opens issued locally, move them
4021          * over to state against the server.
4022          */
4023         LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) {
4024                 lop = LIST_FIRST(&lowp->nfsow_open);
4025                 if (lop != NULL) {
4026                         if (LIST_NEXT(lop, nfso_list) != NULL)
4027                                 panic("nfsdlg mult opens");
4028                         /*
4029                          * Look for the same openowner against the server.
4030                          */
4031                         LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
4032                                 if (!NFSBCMP(lowp->nfsow_owner,
4033                                     owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) {
4034                                         newnfs_copycred(&dp->nfsdl_cred, cred);
4035                                         ret = nfscl_moveopen(vp, clp, nmp, lop,
4036                                             owp, dp, cred, p);
4037                                         if (ret == NFSERR_STALECLIENTID ||
4038                                             ret == NFSERR_STALEDONTRECOVER ||
4039                                             ret == NFSERR_BADSESSION)
4040                                                 return (ret);
4041                                         if (ret) {
4042                                                 nfscl_freeopen(lop, 1);
4043                                                 if (!error)
4044                                                         error = ret;
4045                                         }
4046                                         break;
4047                                 }
4048                         }
4049
4050                         /*
4051                          * If no openowner found, create one and get an open
4052                          * for it.
4053                          */
4054                         if (owp == NULL) {
4055                                 nowp = malloc(
4056                                     sizeof (struct nfsclowner), M_NFSCLOWNER,
4057                                     M_WAITOK);
4058                                 nfscl_newopen(clp, NULL, &owp, &nowp, &op, 
4059                                     NULL, lowp->nfsow_owner, dp->nfsdl_fh,
4060                                     dp->nfsdl_fhlen, NULL, NULL);
4061                                 newnfs_copycred(&dp->nfsdl_cred, cred);
4062                                 ret = nfscl_moveopen(vp, clp, nmp, lop,
4063                                     owp, dp, cred, p);
4064                                 if (ret) {
4065                                         nfscl_freeopenowner(owp, 0);
4066                                         if (ret == NFSERR_STALECLIENTID ||
4067                                             ret == NFSERR_STALEDONTRECOVER ||
4068                                             ret == NFSERR_BADSESSION)
4069                                                 return (ret);
4070                                         if (ret) {
4071                                                 nfscl_freeopen(lop, 1);
4072                                                 if (!error)
4073                                                         error = ret;
4074                                         }
4075                                 }
4076                         }
4077                 }
4078         }
4079
4080         /*
4081          * Now, get byte range locks for any locks done locally.
4082          */
4083         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4084                 LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) {
4085                         newnfs_copycred(&dp->nfsdl_cred, cred);
4086                         ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p);
4087                         if (ret == NFSERR_STALESTATEID ||
4088                             ret == NFSERR_STALEDONTRECOVER ||
4089                             ret == NFSERR_STALECLIENTID ||
4090                             ret == NFSERR_BADSESSION)
4091                                 return (ret);
4092                         if (ret && !error)
4093                                 error = ret;
4094                 }
4095         }
4096         return (error);
4097 }
4098
4099 /*
4100  * Move a locally issued open over to an owner on the state list.
4101  * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and
4102  * returns with it unlocked.
4103  */
4104 static int
4105 nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4106     struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp,
4107     struct ucred *cred, NFSPROC_T *p)
4108 {
4109         struct nfsclopen *op, *nop;
4110         struct nfscldeleg *ndp;
4111         struct nfsnode *np;
4112         int error = 0, newone;
4113
4114         /*
4115          * First, look for an appropriate open, If found, just increment the
4116          * opencnt in it.
4117          */
4118         LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
4119                 if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode &&
4120                     op->nfso_fhlen == lop->nfso_fhlen &&
4121                     !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) {
4122                         op->nfso_opencnt += lop->nfso_opencnt;
4123                         nfscl_freeopen(lop, 1);
4124                         return (0);
4125                 }
4126         }
4127
4128         /* No appropriate open, so we have to do one against the server. */
4129         np = VTONFS(vp);
4130         nop = malloc(sizeof (struct nfsclopen) +
4131             lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK);
4132         newone = 0;
4133         nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner,
4134             lop->nfso_fh, lop->nfso_fhlen, cred, &newone);
4135         ndp = dp;
4136         error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen,
4137             lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op,
4138             NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p);
4139         if (error) {
4140                 if (newone)
4141                         nfscl_freeopen(op, 0);
4142         } else {
4143                 op->nfso_mode |= lop->nfso_mode;
4144                 op->nfso_opencnt += lop->nfso_opencnt;
4145                 nfscl_freeopen(lop, 1);
4146         }
4147         if (nop != NULL)
4148                 free(nop, M_NFSCLOPEN);
4149         if (ndp != NULL) {
4150                 /*
4151                  * What should I do with the returned delegation, since the
4152                  * delegation is being recalled? For now, just printf and
4153                  * through it away.
4154                  */
4155                 printf("Moveopen returned deleg\n");
4156                 free(ndp, M_NFSCLDELEG);
4157         }
4158         return (error);
4159 }
4160
4161 /*
4162  * Recall all delegations on this client.
4163  */
4164 static void
4165 nfscl_totalrecall(struct nfsclclient *clp)
4166 {
4167         struct nfscldeleg *dp;
4168
4169         TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
4170                 if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0)
4171                         dp->nfsdl_flags |= NFSCLDL_RECALL;
4172         }
4173 }
4174
4175 /*
4176  * Relock byte ranges. Called for delegation recall and state expiry.
4177  */
4178 static int
4179 nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
4180     struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred,
4181     NFSPROC_T *p)
4182 {
4183         struct nfscllockowner *nlp;
4184         struct nfsfh *nfhp;
4185         u_int64_t off, len;
4186         int error, newone, donelocally;
4187
4188         off = lop->nfslo_first;
4189         len = lop->nfslo_end - lop->nfslo_first;
4190         error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
4191             clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner,
4192             lp->nfsl_openowner, &nlp, &newone, &donelocally);
4193         if (error || donelocally)
4194                 return (error);
4195         nfhp = VTONFS(vp)->n_fhp;
4196         error = nfscl_trylock(nmp, vp, nfhp->nfh_fh,
4197             nfhp->nfh_len, nlp, newone, 0, off,
4198             len, lop->nfslo_type, cred, p);
4199         if (error)
4200                 nfscl_freelockowner(nlp, 0);
4201         return (error);
4202 }
4203
4204 /*
4205  * Called to re-open a file. Basically get a vnode for the file handle
4206  * and then call nfsrpc_openrpc() to do the rest.
4207  */
4208 static int
4209 nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen,
4210     u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp,
4211     struct ucred *cred, NFSPROC_T *p)
4212 {
4213         struct nfsnode *np;
4214         vnode_t vp;
4215         int error;
4216
4217         error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np);
4218         if (error)
4219                 return (error);
4220         vp = NFSTOV(np);
4221         if (np->n_v4 != NULL) {
4222                 error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data,
4223                     np->n_v4->n4_fhlen, fhp, fhlen, mode, op,
4224                     NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0,
4225                     cred, p);
4226         } else {
4227                 error = EINVAL;
4228         }
4229         vrele(vp);
4230         return (error);
4231 }
4232
4233 /*
4234  * Try an open against the server. Just call nfsrpc_openrpc(), retrying while
4235  * NFSERR_DELAY. Also, try system credentials, if the passed in credentials
4236  * fail.
4237  */
4238 static int
4239 nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4240     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
4241     u_int8_t *name, int namelen, struct nfscldeleg **ndpp,
4242     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p)
4243 {
4244         int error;
4245
4246         do {
4247                 error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen,
4248                     mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p,
4249                     0, 0);
4250                 if (error == NFSERR_DELAY)
4251                         (void) nfs_catnap(PZERO, error, "nfstryop");
4252         } while (error == NFSERR_DELAY);
4253         if (error == EAUTH || error == EACCES) {
4254                 /* Try again using system credentials */
4255                 newnfs_setroot(cred);
4256                 do {
4257                     error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp,
4258                         newfhlen, mode, op, name, namelen, ndpp, reclaim,
4259                         delegtype, cred, p, 1, 0);
4260                     if (error == NFSERR_DELAY)
4261                         (void) nfs_catnap(PZERO, error, "nfstryop");
4262                 } while (error == NFSERR_DELAY);
4263         }
4264         return (error);
4265 }
4266
4267 /*
4268  * Try a byte range lock. Just loop on nfsrpc_lock() while it returns
4269  * NFSERR_DELAY. Also, retry with system credentials, if the provided
4270  * cred don't work.
4271  */
4272 static int
4273 nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp,
4274     int fhlen, struct nfscllockowner *nlp, int newone, int reclaim,
4275     u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p)
4276 {
4277         struct nfsrv_descript nfsd, *nd = &nfsd;
4278         int error;
4279
4280         do {
4281                 error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone,
4282                     reclaim, off, len, type, cred, p, 0);
4283                 if (!error && nd->nd_repstat == NFSERR_DELAY)
4284                         (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4285                             "nfstrylck");
4286         } while (!error && nd->nd_repstat == NFSERR_DELAY);
4287         if (!error)
4288                 error = nd->nd_repstat;
4289         if (error == EAUTH || error == EACCES) {
4290                 /* Try again using root credentials */
4291                 newnfs_setroot(cred);
4292                 do {
4293                         error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp,
4294                             newone, reclaim, off, len, type, cred, p, 1);
4295                         if (!error && nd->nd_repstat == NFSERR_DELAY)
4296                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4297                                     "nfstrylck");
4298                 } while (!error && nd->nd_repstat == NFSERR_DELAY);
4299                 if (!error)
4300                         error = nd->nd_repstat;
4301         }
4302         return (error);
4303 }
4304
4305 /*
4306  * Try a delegreturn against the server. Just call nfsrpc_delegreturn(),
4307  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4308  * credentials fail.
4309  */
4310 static int
4311 nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred,
4312     struct nfsmount *nmp, NFSPROC_T *p)
4313 {
4314         int error;
4315
4316         do {
4317                 error = nfsrpc_delegreturn(dp, cred, nmp, p, 0);
4318                 if (error == NFSERR_DELAY)
4319                         (void) nfs_catnap(PZERO, error, "nfstrydp");
4320         } while (error == NFSERR_DELAY);
4321         if (error == EAUTH || error == EACCES) {
4322                 /* Try again using system credentials */
4323                 newnfs_setroot(cred);
4324                 do {
4325                         error = nfsrpc_delegreturn(dp, cred, nmp, p, 1);
4326                         if (error == NFSERR_DELAY)
4327                                 (void) nfs_catnap(PZERO, error, "nfstrydp");
4328                 } while (error == NFSERR_DELAY);
4329         }
4330         return (error);
4331 }
4332
4333 /*
4334  * Try a close against the server. Just call nfsrpc_closerpc(),
4335  * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in
4336  * credentials fail.
4337  */
4338 int
4339 nfscl_tryclose(struct nfsclopen *op, struct ucred *cred,
4340     struct nfsmount *nmp, NFSPROC_T *p)
4341 {
4342         struct nfsrv_descript nfsd, *nd = &nfsd;
4343         int error;
4344
4345         do {
4346                 error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0);
4347                 if (error == NFSERR_DELAY)
4348                         (void) nfs_catnap(PZERO, error, "nfstrycl");
4349         } while (error == NFSERR_DELAY);
4350         if (error == EAUTH || error == EACCES) {
4351                 /* Try again using system credentials */
4352                 newnfs_setroot(cred);
4353                 do {
4354                         error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1);
4355                         if (error == NFSERR_DELAY)
4356                                 (void) nfs_catnap(PZERO, error, "nfstrycl");
4357                 } while (error == NFSERR_DELAY);
4358         }
4359         return (error);
4360 }
4361
4362 /*
4363  * Decide if a delegation on a file permits close without flushing writes
4364  * to the server. This might be a big performance win in some environments.
4365  * (Not useful until the client does caching on local stable storage.)
4366  */
4367 int
4368 nfscl_mustflush(vnode_t vp)
4369 {
4370         struct nfsclclient *clp;
4371         struct nfscldeleg *dp;
4372         struct nfsnode *np;
4373         struct nfsmount *nmp;
4374
4375         np = VTONFS(vp);
4376         nmp = VFSTONFS(vp->v_mount);
4377         if (!NFSHASNFSV4(nmp))
4378                 return (1);
4379         NFSLOCKCLSTATE();
4380         clp = nfscl_findcl(nmp);
4381         if (clp == NULL) {
4382                 NFSUNLOCKCLSTATE();
4383                 return (1);
4384         }
4385         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4386         if (dp != NULL && (dp->nfsdl_flags &
4387             (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) ==
4388              NFSCLDL_WRITE &&
4389             (dp->nfsdl_sizelimit >= np->n_size ||
4390              !NFSHASSTRICT3530(nmp))) {
4391                 NFSUNLOCKCLSTATE();
4392                 return (0);
4393         }
4394         NFSUNLOCKCLSTATE();
4395         return (1);
4396 }
4397
4398 /*
4399  * See if a (write) delegation exists for this file.
4400  */
4401 int
4402 nfscl_nodeleg(vnode_t vp, int writedeleg)
4403 {
4404         struct nfsclclient *clp;
4405         struct nfscldeleg *dp;
4406         struct nfsnode *np;
4407         struct nfsmount *nmp;
4408
4409         np = VTONFS(vp);
4410         nmp = VFSTONFS(vp->v_mount);
4411         if (!NFSHASNFSV4(nmp))
4412                 return (1);
4413         NFSLOCKCLSTATE();
4414         clp = nfscl_findcl(nmp);
4415         if (clp == NULL) {
4416                 NFSUNLOCKCLSTATE();
4417                 return (1);
4418         }
4419         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4420         if (dp != NULL &&
4421             (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 &&
4422             (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) ==
4423              NFSCLDL_WRITE)) {
4424                 NFSUNLOCKCLSTATE();
4425                 return (0);
4426         }
4427         NFSUNLOCKCLSTATE();
4428         return (1);
4429 }
4430
4431 /*
4432  * Look for an associated delegation that should be DelegReturned.
4433  */
4434 int
4435 nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp)
4436 {
4437         struct nfsclclient *clp;
4438         struct nfscldeleg *dp;
4439         struct nfsclowner *owp;
4440         struct nfscllockowner *lp;
4441         struct nfsmount *nmp;
4442         struct ucred *cred;
4443         struct nfsnode *np;
4444         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4445
4446         nmp = VFSTONFS(vp->v_mount);
4447         np = VTONFS(vp);
4448         NFSLOCKCLSTATE();
4449         /*
4450          * Loop around waiting for:
4451          * - outstanding I/O operations on delegations to complete
4452          * - for a delegation on vp that has state, lock the client and
4453          *   do a recall
4454          * - return delegation with no state
4455          */
4456         while (1) {
4457                 clp = nfscl_findcl(nmp);
4458                 if (clp == NULL) {
4459                         NFSUNLOCKCLSTATE();
4460                         return (retcnt);
4461                 }
4462                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4463                     np->n_fhp->nfh_len);
4464                 if (dp != NULL) {
4465                     /*
4466                      * Wait for outstanding I/O ops to be done.
4467                      */
4468                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4469                         if (igotlock) {
4470                             nfsv4_unlock(&clp->nfsc_lock, 0);
4471                             igotlock = 0;
4472                         }
4473                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4474                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4475                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4476                         continue;
4477                     }
4478                     needsrecall = 0;
4479                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4480                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4481                             needsrecall = 1;
4482                             break;
4483                         }
4484                     }
4485                     if (!needsrecall) {
4486                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4487                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4488                                 needsrecall = 1;
4489                                 break;
4490                             }
4491                         }
4492                     }
4493                     if (needsrecall && !triedrecall) {
4494                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4495                         islept = 0;
4496                         while (!igotlock) {
4497                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4498                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4499                             if (islept)
4500                                 break;
4501                         }
4502                         if (islept)
4503                             continue;
4504                         NFSUNLOCKCLSTATE();
4505                         cred = newnfs_getcred();
4506                         newnfs_copycred(&dp->nfsdl_cred, cred);
4507                         nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0, NULL);
4508                         NFSFREECRED(cred);
4509                         triedrecall = 1;
4510                         NFSLOCKCLSTATE();
4511                         nfsv4_unlock(&clp->nfsc_lock, 0);
4512                         igotlock = 0;
4513                         continue;
4514                     }
4515                     *stp = dp->nfsdl_stateid;
4516                     retcnt = 1;
4517                     nfscl_cleandeleg(dp);
4518                     nfscl_freedeleg(&clp->nfsc_deleg, dp);
4519                 }
4520                 if (igotlock)
4521                     nfsv4_unlock(&clp->nfsc_lock, 0);
4522                 NFSUNLOCKCLSTATE();
4523                 return (retcnt);
4524         }
4525 }
4526
4527 /*
4528  * Look for associated delegation(s) that should be DelegReturned.
4529  */
4530 int
4531 nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp,
4532     nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p)
4533 {
4534         struct nfsclclient *clp;
4535         struct nfscldeleg *dp;
4536         struct nfsclowner *owp;
4537         struct nfscllockowner *lp;
4538         struct nfsmount *nmp;
4539         struct ucred *cred;
4540         struct nfsnode *np;
4541         int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept;
4542
4543         nmp = VFSTONFS(fvp->v_mount);
4544         *gotfdp = 0;
4545         *gottdp = 0;
4546         NFSLOCKCLSTATE();
4547         /*
4548          * Loop around waiting for:
4549          * - outstanding I/O operations on delegations to complete
4550          * - for a delegation on fvp that has state, lock the client and
4551          *   do a recall
4552          * - return delegation(s) with no state.
4553          */
4554         while (1) {
4555                 clp = nfscl_findcl(nmp);
4556                 if (clp == NULL) {
4557                         NFSUNLOCKCLSTATE();
4558                         return (retcnt);
4559                 }
4560                 np = VTONFS(fvp);
4561                 dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4562                     np->n_fhp->nfh_len);
4563                 if (dp != NULL && *gotfdp == 0) {
4564                     /*
4565                      * Wait for outstanding I/O ops to be done.
4566                      */
4567                     if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4568                         if (igotlock) {
4569                             nfsv4_unlock(&clp->nfsc_lock, 0);
4570                             igotlock = 0;
4571                         }
4572                         dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4573                         (void) nfsmsleep(&dp->nfsdl_rwlock,
4574                             NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4575                         continue;
4576                     }
4577                     needsrecall = 0;
4578                     LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4579                         if (!LIST_EMPTY(&owp->nfsow_open)) {
4580                             needsrecall = 1;
4581                             break;
4582                         }
4583                     }
4584                     if (!needsrecall) {
4585                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4586                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4587                                 needsrecall = 1;
4588                                 break;
4589                             }
4590                         }
4591                     }
4592                     if (needsrecall && !triedrecall) {
4593                         dp->nfsdl_flags |= NFSCLDL_DELEGRET;
4594                         islept = 0;
4595                         while (!igotlock) {
4596                             igotlock = nfsv4_lock(&clp->nfsc_lock, 1,
4597                                 &islept, NFSCLSTATEMUTEXPTR, NULL);
4598                             if (islept)
4599                                 break;
4600                         }
4601                         if (islept)
4602                             continue;
4603                         NFSUNLOCKCLSTATE();
4604                         cred = newnfs_getcred();
4605                         newnfs_copycred(&dp->nfsdl_cred, cred);
4606                         nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0, NULL);
4607                         NFSFREECRED(cred);
4608                         triedrecall = 1;
4609                         NFSLOCKCLSTATE();
4610                         nfsv4_unlock(&clp->nfsc_lock, 0);
4611                         igotlock = 0;
4612                         continue;
4613                     }
4614                     *fstp = dp->nfsdl_stateid;
4615                     retcnt++;
4616                     *gotfdp = 1;
4617                     nfscl_cleandeleg(dp);
4618                     nfscl_freedeleg(&clp->nfsc_deleg, dp);
4619                 }
4620                 if (igotlock) {
4621                     nfsv4_unlock(&clp->nfsc_lock, 0);
4622                     igotlock = 0;
4623                 }
4624                 if (tvp != NULL) {
4625                     np = VTONFS(tvp);
4626                     dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh,
4627                         np->n_fhp->nfh_len);
4628                     if (dp != NULL && *gottdp == 0) {
4629                         /*
4630                          * Wait for outstanding I/O ops to be done.
4631                          */
4632                         if (dp->nfsdl_rwlock.nfslock_usecnt > 0) {
4633                             dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED;
4634                             (void) nfsmsleep(&dp->nfsdl_rwlock,
4635                                 NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL);
4636                             continue;
4637                         }
4638                         LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) {
4639                             if (!LIST_EMPTY(&owp->nfsow_open)) {
4640                                 NFSUNLOCKCLSTATE();
4641                                 return (retcnt);
4642                             }
4643                         }
4644                         LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) {
4645                             if (!LIST_EMPTY(&lp->nfsl_lock)) {
4646                                 NFSUNLOCKCLSTATE();
4647                                 return (retcnt);
4648                             }
4649                         }
4650                         *tstp = dp->nfsdl_stateid;
4651                         retcnt++;
4652                         *gottdp = 1;
4653                         nfscl_cleandeleg(dp);
4654                         nfscl_freedeleg(&clp->nfsc_deleg, dp);
4655                     }
4656                 }
4657                 NFSUNLOCKCLSTATE();
4658                 return (retcnt);
4659         }
4660 }
4661
4662 /*
4663  * Get a reference on the clientid associated with the mount point.
4664  * Return 1 if success, 0 otherwise.
4665  */
4666 int
4667 nfscl_getref(struct nfsmount *nmp)
4668 {
4669         struct nfsclclient *clp;
4670
4671         NFSLOCKCLSTATE();
4672         clp = nfscl_findcl(nmp);
4673         if (clp == NULL) {
4674                 NFSUNLOCKCLSTATE();
4675                 return (0);
4676         }
4677         nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL);
4678         NFSUNLOCKCLSTATE();
4679         return (1);
4680 }
4681
4682 /*
4683  * Release a reference on a clientid acquired with the above call.
4684  */
4685 void
4686 nfscl_relref(struct nfsmount *nmp)
4687 {
4688         struct nfsclclient *clp;
4689
4690         NFSLOCKCLSTATE();
4691         clp = nfscl_findcl(nmp);
4692         if (clp == NULL) {
4693                 NFSUNLOCKCLSTATE();
4694                 return;
4695         }
4696         nfsv4_relref(&clp->nfsc_lock);
4697         NFSUNLOCKCLSTATE();
4698 }
4699
4700 /*
4701  * Save the size attribute in the delegation, since the nfsnode
4702  * is going away.
4703  */
4704 void
4705 nfscl_reclaimnode(vnode_t vp)
4706 {
4707         struct nfsclclient *clp;
4708         struct nfscldeleg *dp;
4709         struct nfsnode *np = VTONFS(vp);
4710         struct nfsmount *nmp;
4711
4712         nmp = VFSTONFS(vp->v_mount);
4713         if (!NFSHASNFSV4(nmp))
4714                 return;
4715         NFSLOCKCLSTATE();
4716         clp = nfscl_findcl(nmp);
4717         if (clp == NULL) {
4718                 NFSUNLOCKCLSTATE();
4719                 return;
4720         }
4721         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4722         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4723                 dp->nfsdl_size = np->n_size;
4724         NFSUNLOCKCLSTATE();
4725 }
4726
4727 /*
4728  * Get the saved size attribute in the delegation, since it is a
4729  * newly allocated nfsnode.
4730  */
4731 void
4732 nfscl_newnode(vnode_t vp)
4733 {
4734         struct nfsclclient *clp;
4735         struct nfscldeleg *dp;
4736         struct nfsnode *np = VTONFS(vp);
4737         struct nfsmount *nmp;
4738
4739         nmp = VFSTONFS(vp->v_mount);
4740         if (!NFSHASNFSV4(nmp))
4741                 return;
4742         NFSLOCKCLSTATE();
4743         clp = nfscl_findcl(nmp);
4744         if (clp == NULL) {
4745                 NFSUNLOCKCLSTATE();
4746                 return;
4747         }
4748         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4749         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE))
4750                 np->n_size = dp->nfsdl_size;
4751         NFSUNLOCKCLSTATE();
4752 }
4753
4754 /*
4755  * If there is a valid write delegation for this file, set the modtime
4756  * to the local clock time.
4757  */
4758 void
4759 nfscl_delegmodtime(vnode_t vp)
4760 {
4761         struct nfsclclient *clp;
4762         struct nfscldeleg *dp;
4763         struct nfsnode *np = VTONFS(vp);
4764         struct nfsmount *nmp;
4765
4766         nmp = VFSTONFS(vp->v_mount);
4767         if (!NFSHASNFSV4(nmp))
4768                 return;
4769         NFSLOCKCLSTATE();
4770         clp = nfscl_findcl(nmp);
4771         if (clp == NULL) {
4772                 NFSUNLOCKCLSTATE();
4773                 return;
4774         }
4775         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4776         if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) {
4777                 nanotime(&dp->nfsdl_modtime);
4778                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
4779         }
4780         NFSUNLOCKCLSTATE();
4781 }
4782
4783 /*
4784  * If there is a valid write delegation for this file with a modtime set,
4785  * put that modtime in mtime.
4786  */
4787 void
4788 nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime)
4789 {
4790         struct nfsclclient *clp;
4791         struct nfscldeleg *dp;
4792         struct nfsnode *np = VTONFS(vp);
4793         struct nfsmount *nmp;
4794
4795         nmp = VFSTONFS(vp->v_mount);
4796         if (!NFSHASNFSV4(nmp))
4797                 return;
4798         NFSLOCKCLSTATE();
4799         clp = nfscl_findcl(nmp);
4800         if (clp == NULL) {
4801                 NFSUNLOCKCLSTATE();
4802                 return;
4803         }
4804         dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
4805         if (dp != NULL &&
4806             (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) ==
4807             (NFSCLDL_WRITE | NFSCLDL_MODTIMESET))
4808                 *mtime = dp->nfsdl_modtime;
4809         NFSUNLOCKCLSTATE();
4810 }
4811
4812 static int
4813 nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers)
4814 {
4815         short *defaulterrp, *errp;
4816
4817         if (!nd->nd_repstat)
4818                 return (0);
4819         if (nd->nd_procnum == NFSPROC_NOOP)
4820                 return (txdr_unsigned(nd->nd_repstat & 0xffff));
4821         if (nd->nd_repstat == EBADRPC)
4822                 return (txdr_unsigned(NFSERR_BADXDR));
4823         if (nd->nd_repstat == NFSERR_MINORVERMISMATCH ||
4824             nd->nd_repstat == NFSERR_OPILLEGAL)
4825                 return (txdr_unsigned(nd->nd_repstat));
4826         if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 &&
4827             minorvers > NFSV4_MINORVERSION) {
4828                 /* NFSv4.n error. */
4829                 return (txdr_unsigned(nd->nd_repstat));
4830         }
4831         if (nd->nd_procnum < NFSV4OP_CBNOPS)
4832                 errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum];
4833         else
4834                 return (txdr_unsigned(nd->nd_repstat));
4835         while (*++errp)
4836                 if (*errp == (short)nd->nd_repstat)
4837                         return (txdr_unsigned(nd->nd_repstat));
4838         return (txdr_unsigned(*defaulterrp));
4839 }
4840
4841 /*
4842  * Called to find/add a layout to a client.
4843  * This function returns the layout with a refcnt (shared lock) upon
4844  * success (returns 0) or with no lock/refcnt on the layout when an
4845  * error is returned.
4846  * If a layout is passed in via lypp, it is locked (exclusively locked).
4847  */
4848 int
4849 nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen,
4850     nfsv4stateid_t *stateidp, int layouttype, int retonclose,
4851     struct nfsclflayouthead *fhlp, struct nfscllayout **lypp,
4852     struct ucred *cred, NFSPROC_T *p)
4853 {
4854         struct nfsclclient *clp;
4855         struct nfscllayout *lyp, *tlyp;
4856         struct nfsclflayout *flp;
4857         struct nfsnode *np = VTONFS(vp);
4858         mount_t mp;
4859         int layout_passed_in;
4860
4861         mp = nmp->nm_mountp;
4862         layout_passed_in = 1;
4863         tlyp = NULL;
4864         lyp = *lypp;
4865         if (lyp == NULL) {
4866                 layout_passed_in = 0;
4867                 tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT,
4868                     M_WAITOK | M_ZERO);
4869         }
4870
4871         NFSLOCKCLSTATE();
4872         clp = nmp->nm_clp;
4873         if (clp == NULL) {
4874                 if (layout_passed_in != 0)
4875                         nfsv4_unlock(&lyp->nfsly_lock, 0);
4876                 NFSUNLOCKCLSTATE();
4877                 if (tlyp != NULL)
4878                         free(tlyp, M_NFSLAYOUT);
4879                 return (EPERM);
4880         }
4881         if (lyp == NULL) {
4882                 /*
4883                  * Although no lyp was passed in, another thread might have
4884                  * allocated one. If one is found, just increment it's ref
4885                  * count and return it.
4886                  */
4887                 lyp = nfscl_findlayout(clp, fhp, fhlen);
4888                 if (lyp == NULL) {
4889                         lyp = tlyp;
4890                         tlyp = NULL;
4891                         lyp->nfsly_stateid.seqid = stateidp->seqid;
4892                         lyp->nfsly_stateid.other[0] = stateidp->other[0];
4893                         lyp->nfsly_stateid.other[1] = stateidp->other[1];
4894                         lyp->nfsly_stateid.other[2] = stateidp->other[2];
4895                         lyp->nfsly_lastbyte = 0;
4896                         LIST_INIT(&lyp->nfsly_flayread);
4897                         LIST_INIT(&lyp->nfsly_flayrw);
4898                         LIST_INIT(&lyp->nfsly_recall);
4899                         lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0];
4900                         lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1];
4901                         lyp->nfsly_clp = clp;
4902                         if (layouttype == NFSLAYOUT_FLEXFILE)
4903                                 lyp->nfsly_flags = NFSLY_FLEXFILE;
4904                         else
4905                                 lyp->nfsly_flags = NFSLY_FILES;
4906                         if (retonclose != 0)
4907                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4908                         lyp->nfsly_fhlen = fhlen;
4909                         NFSBCOPY(fhp, lyp->nfsly_fh, fhlen);
4910                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4911                         LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp,
4912                             nfsly_hash);
4913                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4914                         nfscl_layoutcnt++;
4915                 } else {
4916                         if (retonclose != 0)
4917                                 lyp->nfsly_flags |= NFSLY_RETONCLOSE;
4918                         if (stateidp->seqid > lyp->nfsly_stateid.seqid)
4919                                 lyp->nfsly_stateid.seqid = stateidp->seqid;
4920                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
4921                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4922                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4923                 }
4924                 nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
4925                 if (NFSCL_FORCEDISM(mp)) {
4926                         NFSUNLOCKCLSTATE();
4927                         if (tlyp != NULL)
4928                                 free(tlyp, M_NFSLAYOUT);
4929                         return (EPERM);
4930                 }
4931                 *lypp = lyp;
4932         } else if (stateidp->seqid > lyp->nfsly_stateid.seqid)
4933                 lyp->nfsly_stateid.seqid = stateidp->seqid;
4934
4935         /* Merge the new list of File Layouts into the list. */
4936         flp = LIST_FIRST(fhlp);
4937         if (flp != NULL) {
4938                 if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ)
4939                         nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp);
4940                 else
4941                         nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp);
4942         }
4943         if (layout_passed_in != 0)
4944                 nfsv4_unlock(&lyp->nfsly_lock, 1);
4945         NFSUNLOCKCLSTATE();
4946         if (tlyp != NULL)
4947                 free(tlyp, M_NFSLAYOUT);
4948         return (0);
4949 }
4950
4951 /*
4952  * Search for a layout by MDS file handle.
4953  * If one is found, it is returned with a refcnt (shared lock) iff
4954  * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is
4955  * returned NULL.
4956  */
4957 struct nfscllayout *
4958 nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen,
4959     uint64_t off, struct nfsclflayout **retflpp, int *recalledp)
4960 {
4961         struct nfscllayout *lyp;
4962         mount_t mp;
4963         int error, igotlock;
4964
4965         mp = clp->nfsc_nmp->nm_mountp;
4966         *recalledp = 0;
4967         *retflpp = NULL;
4968         NFSLOCKCLSTATE();
4969         lyp = nfscl_findlayout(clp, fhp, fhlen);
4970         if (lyp != NULL) {
4971                 if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
4972                         TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list);
4973                         TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list);
4974                         lyp->nfsly_timestamp = NFSD_MONOSEC + 120;
4975                         error = nfscl_findlayoutforio(lyp, off,
4976                             NFSV4OPEN_ACCESSREAD, retflpp);
4977                         if (error == 0)
4978                                 nfsv4_getref(&lyp->nfsly_lock, NULL,
4979                                     NFSCLSTATEMUTEXPTR, mp);
4980                         else {
4981                                 do {
4982                                         igotlock = nfsv4_lock(&lyp->nfsly_lock,
4983                                             1, NULL, NFSCLSTATEMUTEXPTR, mp);
4984                                 } while (igotlock == 0 && !NFSCL_FORCEDISM(mp));
4985                                 *retflpp = NULL;
4986                         }
4987                         if (NFSCL_FORCEDISM(mp)) {
4988                                 lyp = NULL;
4989                                 *recalledp = 1;
4990                         }
4991                 } else {
4992                         lyp = NULL;
4993                         *recalledp = 1;
4994                 }
4995         }
4996         NFSUNLOCKCLSTATE();
4997         return (lyp);
4998 }
4999
5000 /*
5001  * Search for a layout by MDS file handle. If one is found, mark in to be
5002  * recalled, if it already marked "return on close".
5003  */
5004 static void
5005 nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp,
5006     int fhlen, struct nfsclrecalllayout **recallpp)
5007 {
5008         struct nfscllayout *lyp;
5009         uint32_t iomode;
5010
5011         if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vp->v_mount)) ||
5012             nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5013             (VTONFS(vp)->n_flag & NNOLAYOUT) != 0)
5014                 return;
5015         lyp = nfscl_findlayout(clp, fhp, fhlen);
5016         if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE |
5017             NFSLY_RECALL)) == NFSLY_RETONCLOSE) {
5018                 iomode = 0;
5019                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5020                         iomode |= NFSLAYOUTIOMODE_READ;
5021                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5022                         iomode |= NFSLAYOUTIOMODE_RW;
5023                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5024                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, 0, 0, NULL,
5025                     *recallpp);
5026                 NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode);
5027                 *recallpp = NULL;
5028         }
5029 }
5030
5031 /*
5032  * Mark the layout to be recalled and with an error.
5033  * Also, disable the dsp from further use.
5034  */
5035 void
5036 nfscl_dserr(uint32_t op, uint32_t stat, struct nfscldevinfo *dp,
5037     struct nfscllayout *lyp, struct nfsclds *dsp)
5038 {
5039         struct nfsclrecalllayout *recallp;
5040         uint32_t iomode;
5041
5042         printf("DS being disabled, error=%d\n", stat);
5043         /* Set up the return of the layout. */
5044         recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK);
5045         iomode = 0;
5046         NFSLOCKCLSTATE();
5047         if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) {
5048                 if (!LIST_EMPTY(&lyp->nfsly_flayread))
5049                         iomode |= NFSLAYOUTIOMODE_READ;
5050                 if (!LIST_EMPTY(&lyp->nfsly_flayrw))
5051                         iomode |= NFSLAYOUTIOMODE_RW;
5052                 (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode,
5053                     0, UINT64_MAX, lyp->nfsly_stateid.seqid, stat, op,
5054                     dp->nfsdi_deviceid, recallp);
5055                 NFSUNLOCKCLSTATE();
5056                 NFSCL_DEBUG(4, "nfscl_dserr recall iomode=%d\n", iomode);
5057         } else {
5058                 NFSUNLOCKCLSTATE();
5059                 free(recallp, M_NFSLAYRECALL);
5060         }
5061
5062         /* And shut the TCP connection down. */
5063         nfscl_cancelreqs(dsp);
5064 }
5065
5066 /*
5067  * Cancel all RPCs for this "dsp" by closing the connection.
5068  * Also, mark the session as defunct.
5069  * If NFSCLDS_SAMECONN is set, the connection is shared with other DSs and
5070  * cannot be shut down.
5071  */
5072 void
5073 nfscl_cancelreqs(struct nfsclds *dsp)
5074 {
5075         struct __rpc_client *cl;
5076         static int non_event;
5077
5078         NFSLOCKDS(dsp);
5079         if ((dsp->nfsclds_flags & (NFSCLDS_CLOSED | NFSCLDS_SAMECONN)) == 0 &&
5080             dsp->nfsclds_sockp != NULL &&
5081             dsp->nfsclds_sockp->nr_client != NULL) {
5082                 dsp->nfsclds_flags |= NFSCLDS_CLOSED;
5083                 cl = dsp->nfsclds_sockp->nr_client;
5084                 dsp->nfsclds_sess.nfsess_defunct = 1;
5085                 NFSUNLOCKDS(dsp);
5086                 CLNT_CLOSE(cl);
5087                 /*
5088                  * This 1sec sleep is done to reduce the number of reconnect
5089                  * attempts made on the DS while it has failed.
5090                  */
5091                 tsleep(&non_event, PVFS, "ndscls", hz);
5092                 return;
5093         }
5094         NFSUNLOCKDS(dsp);
5095 }
5096
5097 /*
5098  * Dereference a layout.
5099  */
5100 void
5101 nfscl_rellayout(struct nfscllayout *lyp, int exclocked)
5102 {
5103
5104         NFSLOCKCLSTATE();
5105         if (exclocked != 0)
5106                 nfsv4_unlock(&lyp->nfsly_lock, 0);
5107         else
5108                 nfsv4_relref(&lyp->nfsly_lock);
5109         NFSUNLOCKCLSTATE();
5110 }
5111
5112 /*
5113  * Search for a devinfo by deviceid. If one is found, return it after
5114  * acquiring a reference count on it.
5115  */
5116 struct nfscldevinfo *
5117 nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid,
5118     struct nfscldevinfo *dip)
5119 {
5120
5121         NFSLOCKCLSTATE();
5122         if (dip == NULL)
5123                 dip = nfscl_finddevinfo(clp, deviceid);
5124         if (dip != NULL)
5125                 dip->nfsdi_refcnt++;
5126         NFSUNLOCKCLSTATE();
5127         return (dip);
5128 }
5129
5130 /*
5131  * Dereference a devinfo structure.
5132  */
5133 static void
5134 nfscl_reldevinfo_locked(struct nfscldevinfo *dip)
5135 {
5136
5137         dip->nfsdi_refcnt--;
5138         if (dip->nfsdi_refcnt == 0)
5139                 wakeup(&dip->nfsdi_refcnt);
5140 }
5141
5142 /*
5143  * Dereference a devinfo structure.
5144  */
5145 void
5146 nfscl_reldevinfo(struct nfscldevinfo *dip)
5147 {
5148
5149         NFSLOCKCLSTATE();
5150         nfscl_reldevinfo_locked(dip);
5151         NFSUNLOCKCLSTATE();
5152 }
5153
5154 /*
5155  * Find a layout for this file handle. Return NULL upon failure.
5156  */
5157 static struct nfscllayout *
5158 nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
5159 {
5160         struct nfscllayout *lyp;
5161
5162         LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash)
5163                 if (lyp->nfsly_fhlen == fhlen &&
5164                     !NFSBCMP(lyp->nfsly_fh, fhp, fhlen))
5165                         break;
5166         return (lyp);
5167 }
5168
5169 /*
5170  * Find a devinfo for this deviceid. Return NULL upon failure.
5171  */
5172 static struct nfscldevinfo *
5173 nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid)
5174 {
5175         struct nfscldevinfo *dip;
5176
5177         LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list)
5178                 if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID)
5179                     == 0)
5180                         break;
5181         return (dip);
5182 }
5183
5184 /*
5185  * Merge the new file layout list into the main one, maintaining it in
5186  * increasing offset order.
5187  */
5188 static void
5189 nfscl_mergeflayouts(struct nfsclflayouthead *fhlp,
5190     struct nfsclflayouthead *newfhlp)
5191 {
5192         struct nfsclflayout *flp, *nflp, *prevflp, *tflp;
5193
5194         flp = LIST_FIRST(fhlp);
5195         prevflp = NULL;
5196         LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) {
5197                 while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) {
5198                         prevflp = flp;
5199                         flp = LIST_NEXT(flp, nfsfl_list);
5200                 }
5201                 if (prevflp == NULL)
5202                         LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list);
5203                 else
5204                         LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list);
5205                 prevflp = nflp;
5206         }
5207 }
5208
5209 /*
5210  * Add this nfscldevinfo to the client, if it doesn't already exist.
5211  * This function consumes the structure pointed at by dip, if not NULL.
5212  */
5213 int
5214 nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, int ind,
5215     struct nfsclflayout *flp)
5216 {
5217         struct nfsclclient *clp;
5218         struct nfscldevinfo *tdip;
5219         uint8_t *dev;
5220
5221         NFSLOCKCLSTATE();
5222         clp = nmp->nm_clp;
5223         if (clp == NULL) {
5224                 NFSUNLOCKCLSTATE();
5225                 if (dip != NULL)
5226                         free(dip, M_NFSDEVINFO);
5227                 return (ENODEV);
5228         }
5229         if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5230                 dev = flp->nfsfl_dev;
5231         else
5232                 dev = flp->nfsfl_ffm[ind].dev;
5233         tdip = nfscl_finddevinfo(clp, dev);
5234         if (tdip != NULL) {
5235                 tdip->nfsdi_layoutrefs++;
5236                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5237                         flp->nfsfl_devp = tdip;
5238                 else
5239                         flp->nfsfl_ffm[ind].devp = tdip;
5240                 nfscl_reldevinfo_locked(tdip);
5241                 NFSUNLOCKCLSTATE();
5242                 if (dip != NULL)
5243                         free(dip, M_NFSDEVINFO);
5244                 return (0);
5245         }
5246         if (dip != NULL) {
5247                 LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list);
5248                 dip->nfsdi_layoutrefs = 1;
5249                 if ((flp->nfsfl_flags & NFSFL_FILE) != 0)
5250                         flp->nfsfl_devp = dip;
5251                 else
5252                         flp->nfsfl_ffm[ind].devp = dip;
5253         }
5254         NFSUNLOCKCLSTATE();
5255         if (dip == NULL)
5256                 return (ENODEV);
5257         return (0);
5258 }
5259
5260 /*
5261  * Free up a layout structure and associated file layout structure(s).
5262  */
5263 void
5264 nfscl_freelayout(struct nfscllayout *layp)
5265 {
5266         struct nfsclflayout *flp, *nflp;
5267         struct nfsclrecalllayout *rp, *nrp;
5268
5269         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) {
5270                 LIST_REMOVE(flp, nfsfl_list);
5271                 nfscl_freeflayout(flp);
5272         }
5273         LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) {
5274                 LIST_REMOVE(flp, nfsfl_list);
5275                 nfscl_freeflayout(flp);
5276         }
5277         LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) {
5278                 LIST_REMOVE(rp, nfsrecly_list);
5279                 free(rp, M_NFSLAYRECALL);
5280         }
5281         nfscl_layoutcnt--;
5282         free(layp, M_NFSLAYOUT);
5283 }
5284
5285 /*
5286  * Free up a file layout structure.
5287  */
5288 void
5289 nfscl_freeflayout(struct nfsclflayout *flp)
5290 {
5291         int i, j;
5292
5293         if ((flp->nfsfl_flags & NFSFL_FILE) != 0) {
5294                 for (i = 0; i < flp->nfsfl_fhcnt; i++)
5295                         free(flp->nfsfl_fh[i], M_NFSFH);
5296                 if (flp->nfsfl_devp != NULL)
5297                         flp->nfsfl_devp->nfsdi_layoutrefs--;
5298         }
5299         if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0)
5300                 for (i = 0; i < flp->nfsfl_mirrorcnt; i++) {
5301                         for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++)
5302                                 free(flp->nfsfl_ffm[i].fh[j], M_NFSFH);
5303                         if (flp->nfsfl_ffm[i].devp != NULL)     
5304                                 flp->nfsfl_ffm[i].devp->nfsdi_layoutrefs--;     
5305                 }
5306         free(flp, M_NFSFLAYOUT);
5307 }
5308
5309 /*
5310  * Free up a file layout devinfo structure.
5311  */
5312 void
5313 nfscl_freedevinfo(struct nfscldevinfo *dip)
5314 {
5315
5316         free(dip, M_NFSDEVINFO);
5317 }
5318
5319 /*
5320  * Mark any layouts that match as recalled.
5321  */
5322 static int
5323 nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode,
5324     uint64_t off, uint64_t len, uint32_t stateseqid, uint32_t stat, uint32_t op,
5325     char *devid, struct nfsclrecalllayout *recallp)
5326 {
5327         struct nfsclrecalllayout *rp, *orp;
5328
5329         recallp->nfsrecly_recalltype = recalltype;
5330         recallp->nfsrecly_iomode = iomode;
5331         recallp->nfsrecly_stateseqid = stateseqid;
5332         recallp->nfsrecly_off = off;
5333         recallp->nfsrecly_len = len;
5334         recallp->nfsrecly_stat = stat;
5335         recallp->nfsrecly_op = op;
5336         if (devid != NULL)
5337                 NFSBCOPY(devid, recallp->nfsrecly_devid, NFSX_V4DEVICEID);
5338         /*
5339          * Order the list as file returns first, followed by fsid and any
5340          * returns, both in increasing stateseqid order.
5341          * Note that the seqids wrap around, so 1 is after 0xffffffff.
5342          * (I'm not sure this is correct because I find RFC5661 confusing
5343          *  on this, but hopefully it will work ok.)
5344          */
5345         orp = NULL;
5346         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5347                 orp = rp;
5348                 if ((recalltype == NFSLAYOUTRETURN_FILE &&
5349                      (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE ||
5350                       nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) ||
5351                     (recalltype != NFSLAYOUTRETURN_FILE &&
5352                      rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE &&
5353                      nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) {
5354                         LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list);
5355                         break;
5356                 }
5357
5358                 /*
5359                  * Put any error return on all the file returns that will
5360                  * preceed this one.
5361                  */
5362                 if (rp->nfsrecly_recalltype == NFSLAYOUTRETURN_FILE &&
5363                    stat != 0 && rp->nfsrecly_stat == 0) {
5364                         rp->nfsrecly_stat = stat;
5365                         rp->nfsrecly_op = op;
5366                         if (devid != NULL)
5367                                 NFSBCOPY(devid, rp->nfsrecly_devid,
5368                                     NFSX_V4DEVICEID);
5369                 }
5370         }
5371         if (rp == NULL) {
5372                 if (orp == NULL)
5373                         LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp,
5374                             nfsrecly_list);
5375                 else
5376                         LIST_INSERT_AFTER(orp, recallp, nfsrecly_list);
5377         }
5378         lyp->nfsly_flags |= NFSLY_RECALL;
5379         wakeup(lyp->nfsly_clp);
5380         return (0);
5381 }
5382
5383 /*
5384  * Compare the two seqids for ordering. The trick is that the seqids can
5385  * wrap around from 0xffffffff->0, so check for the cases where one
5386  * has wrapped around.
5387  * Return 1 if seqid1 comes before seqid2, 0 otherwise.
5388  */
5389 static int
5390 nfscl_seq(uint32_t seqid1, uint32_t seqid2)
5391 {
5392
5393         if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff)
5394                 /* seqid2 has wrapped around. */
5395                 return (0);
5396         if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff)
5397                 /* seqid1 has wrapped around. */
5398                 return (1);
5399         if (seqid1 <= seqid2)
5400                 return (1);
5401         return (0);
5402 }
5403
5404 /*
5405  * Do a layout return for each of the recalls.
5406  */
5407 static void
5408 nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp,
5409     struct ucred *cred, NFSPROC_T *p)
5410 {
5411         struct nfsclrecalllayout *rp;
5412         nfsv4stateid_t stateid;
5413         int layouttype;
5414
5415         NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER);
5416         stateid.seqid = lyp->nfsly_stateid.seqid;
5417         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5418                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5419         else
5420                 layouttype = NFSLAYOUT_FLEXFILE;
5421         LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) {
5422                 (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh,
5423                     lyp->nfsly_fhlen, 0, layouttype,
5424                     rp->nfsrecly_iomode, rp->nfsrecly_recalltype,
5425                     rp->nfsrecly_off, rp->nfsrecly_len,
5426                     &stateid, cred, p, rp->nfsrecly_stat, rp->nfsrecly_op,
5427                     rp->nfsrecly_devid);
5428         }
5429 }
5430
5431 /*
5432  * Do the layout commit for a file layout.
5433  */
5434 static void
5435 nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp,
5436     struct ucred *cred, NFSPROC_T *p)
5437 {
5438         struct nfsclflayout *flp;
5439         uint64_t len;
5440         int error, layouttype;
5441
5442         if ((lyp->nfsly_flags & NFSLY_FILES) != 0)
5443                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5444         else
5445                 layouttype = NFSLAYOUT_FLEXFILE;
5446         LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) {
5447                 if (layouttype == NFSLAYOUT_FLEXFILE &&
5448                     (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) {
5449                         NFSCL_DEBUG(4, "Flex file: no layoutcommit\n");
5450                         /* If not supported, don't bother doing it. */
5451                         NFSLOCKMNT(nmp);
5452                         nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5453                         NFSUNLOCKMNT(nmp);
5454                         break;
5455                 } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) {
5456                         len = flp->nfsfl_end - flp->nfsfl_off;
5457                         error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh,
5458                             lyp->nfsly_fhlen, 0, flp->nfsfl_off, len,
5459                             lyp->nfsly_lastbyte, &lyp->nfsly_stateid,
5460                             layouttype, cred, p, NULL);
5461                         NFSCL_DEBUG(4, "layoutcommit err=%d\n", error);
5462                         if (error == NFSERR_NOTSUPP) {
5463                                 /* If not supported, don't bother doing it. */
5464                                 NFSLOCKMNT(nmp);
5465                                 nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT;
5466                                 NFSUNLOCKMNT(nmp);
5467                                 break;
5468                         }
5469                 }
5470         }
5471 }
5472
5473 /*
5474  * Commit all layouts for a file (vnode).
5475  */
5476 int
5477 nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p)
5478 {
5479         struct nfsclclient *clp;
5480         struct nfscllayout *lyp;
5481         struct nfsnode *np = VTONFS(vp);
5482         mount_t mp;
5483         struct nfsmount *nmp;
5484
5485         mp = vp->v_mount;
5486         nmp = VFSTONFS(mp);
5487         if (NFSHASNOLAYOUTCOMMIT(nmp))
5488                 return (0);
5489         NFSLOCKCLSTATE();
5490         clp = nmp->nm_clp;
5491         if (clp == NULL) {
5492                 NFSUNLOCKCLSTATE();
5493                 return (EPERM);
5494         }
5495         lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
5496         if (lyp == NULL) {
5497                 NFSUNLOCKCLSTATE();
5498                 return (EPERM);
5499         }
5500         nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
5501         if (NFSCL_FORCEDISM(mp)) {
5502                 NFSUNLOCKCLSTATE();
5503                 return (EPERM);
5504         }
5505 tryagain:
5506         if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) {
5507                 lyp->nfsly_flags &= ~NFSLY_WRITTEN;
5508                 NFSUNLOCKCLSTATE();
5509                 NFSCL_DEBUG(4, "do layoutcommit2\n");
5510                 nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p);
5511                 NFSLOCKCLSTATE();
5512                 goto tryagain;
5513         }
5514         nfsv4_relref(&lyp->nfsly_lock);
5515         NFSUNLOCKCLSTATE();
5516         return (0);
5517 }