]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
Fix the default number of threads for Flex File layout pNFS client I/O.
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46
47 #ifndef APPLEKEXT
48 #include "opt_inet6.h"
49
50 #include <fs/nfs/nfsport.h>
51 #include <sys/sysctl.h>
52 #include <sys/taskqueue.h>
53
54 SYSCTL_DECL(_vfs_nfs);
55
56 static int      nfsignore_eexist = 0;
57 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
58     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
59
60 /*
61  * Global variables
62  */
63 extern int nfs_numnfscbd;
64 extern struct timeval nfsboottime;
65 extern u_int32_t newnfs_false, newnfs_true;
66 extern nfstype nfsv34_type[9];
67 extern int nfsrv_useacl;
68 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
69 extern int nfscl_debuglevel;
70 extern int nfs_pnfsiothreads;
71 NFSCLSTATEMUTEX;
72 int nfstest_outofseq = 0;
73 int nfscl_assumeposixlocks = 1;
74 int nfscl_enablecallb = 0;
75 short nfsv4_cbport = NFSV4_CBPORT;
76 int nfstest_openallsetattr = 0;
77 #endif  /* !APPLEKEXT */
78
79 #define DIRHDSIZ        offsetof(struct dirent, d_name)
80
81 /*
82  * nfscl_getsameserver() can return one of three values:
83  * NFSDSP_USETHISSESSION - Use this session for the DS.
84  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
85  *     session.
86  * NFSDSP_NOTFOUND - No matching server was found.
87  */
88 enum nfsclds_state {
89         NFSDSP_USETHISSESSION = 0,
90         NFSDSP_SEQTHISSESSION = 1,
91         NFSDSP_NOTFOUND = 2,
92 };
93
94 /*
95  * Do a write RPC on a DS data file, using this structure for the arguments,
96  * so that this function can be executed by a separate kernel process.
97  */
98 struct nfsclwritedsdorpc {
99         int                     done;
100         int                     inprog;
101         struct task             tsk;
102         struct vnode            *vp;
103         int                     iomode;
104         int                     must_commit;
105         nfsv4stateid_t          *stateidp;
106         struct nfsclds          *dsp;
107         uint64_t                off;
108         int                     len;
109         struct nfsfh            *fhp;
110         struct mbuf             *m;
111         int                     vers;
112         int                     minorvers;
113         struct ucred            *cred;
114         NFSPROC_T               *p;
115         int                     err;
116 };
117
118 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
119     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
120 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
121     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
122 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
123     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
124     void *);
125 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
126     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
127     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
128 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
129     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
130     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
131     int *, void *, int *);
132 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
133     struct nfscllockowner *, u_int64_t, u_int64_t,
134     u_int32_t, struct ucred *, NFSPROC_T *, int);
135 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
136     struct acl *, nfsv4stateid_t *, void *);
137 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
138     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
139     struct ucred *, NFSPROC_T *);
140 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
141     struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *);
142 static void nfscl_initsessionslots(struct nfsclsession *);
143 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
144     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
145     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
146     NFSPROC_T *);
147 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
148     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
149     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
150     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
151 static struct mbuf *nfsm_copym(struct mbuf *, int, int);
152 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
153     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
154     struct ucred *, NFSPROC_T *);
155 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
156     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
157     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
158 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
159     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
160     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
161 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
162     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
163     struct ucred *, NFSPROC_T *);
164 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
165     struct nfsclds *, struct nfsclds **);
166 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
167     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
168     NFSPROC_T *);
169 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
170     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
171 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
172     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
173 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
174     NFSPROC_T *);
175 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
176     int *, struct nfsclflayouthead *);
177 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
178     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
179     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
180 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
181     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
182     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
183     struct nfsfh **, int *, int *, void *, int *);
184 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
185     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
186     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
187     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
188 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
189     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
190     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
191     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
192     int, int, int, int *, struct nfsclflayouthead *, int *);
193 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
194     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
195     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
196 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
197     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
198     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
199
200 int nfs_pnfsio(task_fn_t *, void *);
201
202 /*
203  * nfs null call from vfs.
204  */
205 APPLESTATIC int
206 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
207 {
208         int error;
209         struct nfsrv_descript nfsd, *nd = &nfsd;
210         
211         NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
212         error = nfscl_request(nd, vp, p, cred, NULL);
213         if (nd->nd_repstat && !error)
214                 error = nd->nd_repstat;
215         mbuf_freem(nd->nd_mrep);
216         return (error);
217 }
218
219 /*
220  * nfs access rpc op.
221  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
222  * modes are changed on the server, accesses might still fail later.
223  */
224 APPLESTATIC int
225 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
226     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
227 {
228         int error;
229         u_int32_t mode, rmode;
230
231         if (acmode & VREAD)
232                 mode = NFSACCESS_READ;
233         else
234                 mode = 0;
235         if (vnode_vtype(vp) == VDIR) {
236                 if (acmode & VWRITE)
237                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
238                                  NFSACCESS_DELETE);
239                 if (acmode & VEXEC)
240                         mode |= NFSACCESS_LOOKUP;
241         } else {
242                 if (acmode & VWRITE)
243                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
244                 if (acmode & VEXEC)
245                         mode |= NFSACCESS_EXECUTE;
246         }
247
248         /*
249          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
250          */
251         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
252             NULL);
253
254         /*
255          * The NFS V3 spec does not clarify whether or not
256          * the returned access bits can be a superset of
257          * the ones requested, so...
258          */
259         if (!error && (rmode & mode) != mode)
260                 error = EACCES;
261         return (error);
262 }
263
264 /*
265  * The actual rpc, separated out for Darwin.
266  */
267 APPLESTATIC int
268 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
269     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
270     void *stuff)
271 {
272         u_int32_t *tl;
273         u_int32_t supported, rmode;
274         int error;
275         struct nfsrv_descript nfsd, *nd = &nfsd;
276         nfsattrbit_t attrbits;
277
278         *attrflagp = 0;
279         supported = mode;
280         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
281         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
282         *tl = txdr_unsigned(mode);
283         if (nd->nd_flag & ND_NFSV4) {
284                 /*
285                  * And do a Getattr op.
286                  */
287                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
288                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
289                 NFSGETATTR_ATTRBIT(&attrbits);
290                 (void) nfsrv_putattrbit(nd, &attrbits);
291         }
292         error = nfscl_request(nd, vp, p, cred, stuff);
293         if (error)
294                 return (error);
295         if (nd->nd_flag & ND_NFSV3) {
296                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
297                 if (error)
298                         goto nfsmout;
299         }
300         if (!nd->nd_repstat) {
301                 if (nd->nd_flag & ND_NFSV4) {
302                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
303                         supported = fxdr_unsigned(u_int32_t, *tl++);
304                 } else {
305                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
306                 }
307                 rmode = fxdr_unsigned(u_int32_t, *tl);
308                 if (nd->nd_flag & ND_NFSV4)
309                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
310
311                 /*
312                  * It's not obvious what should be done about
313                  * unsupported access modes. For now, be paranoid
314                  * and clear the unsupported ones.
315                  */
316                 rmode &= supported;
317                 *rmodep = rmode;
318         } else
319                 error = nd->nd_repstat;
320 nfsmout:
321         mbuf_freem(nd->nd_mrep);
322         return (error);
323 }
324
325 /*
326  * nfs open rpc
327  */
328 APPLESTATIC int
329 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
330 {
331         struct nfsclopen *op;
332         struct nfscldeleg *dp;
333         struct nfsfh *nfhp;
334         struct nfsnode *np = VTONFS(vp);
335         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
336         u_int32_t mode, clidrev;
337         int ret, newone, error, expireret = 0, retrycnt;
338
339         /*
340          * For NFSv4, Open Ops are only done on Regular Files.
341          */
342         if (vnode_vtype(vp) != VREG)
343                 return (0);
344         mode = 0;
345         if (amode & FREAD)
346                 mode |= NFSV4OPEN_ACCESSREAD;
347         if (amode & FWRITE)
348                 mode |= NFSV4OPEN_ACCESSWRITE;
349         nfhp = np->n_fhp;
350
351         retrycnt = 0;
352 #ifdef notdef
353 { char name[100]; int namel;
354 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
355 bcopy(NFS4NODENAME(np->n_v4), name, namel);
356 name[namel] = '\0';
357 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
358 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
359 else printf(" fhl=0\n");
360 }
361 #endif
362         do {
363             dp = NULL;
364             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
365                 cred, p, NULL, &op, &newone, &ret, 1);
366             if (error) {
367                 return (error);
368             }
369             if (nmp->nm_clp != NULL)
370                 clidrev = nmp->nm_clp->nfsc_clientidrev;
371             else
372                 clidrev = 0;
373             if (ret == NFSCLOPEN_DOOPEN) {
374                 if (np->n_v4 != NULL) {
375                         /*
376                          * For the first attempt, try and get a layout, if
377                          * pNFS is enabled for the mount.
378                          */
379                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
380                             nfs_numnfscbd == 0 ||
381                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
382                                 error = nfsrpc_openrpc(nmp, vp,
383                                     np->n_v4->n4_data,
384                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
385                                     np->n_fhp->nfh_len, mode, op,
386                                     NFS4NODENAME(np->n_v4),
387                                     np->n_v4->n4_namelen,
388                                     &dp, 0, 0x0, cred, p, 0, 0);
389                         else
390                                 error = nfsrpc_getopenlayout(nmp, vp,
391                                     np->n_v4->n4_data,
392                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
393                                     np->n_fhp->nfh_len, mode, op,
394                                     NFS4NODENAME(np->n_v4),
395                                     np->n_v4->n4_namelen, &dp, cred, p);
396                         if (dp != NULL) {
397 #ifdef APPLE
398                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
399 #else
400                                 NFSLOCKNODE(np);
401                                 np->n_flag &= ~NDELEGMOD;
402                                 /*
403                                  * Invalidate the attribute cache, so that
404                                  * attributes that pre-date the issue of a
405                                  * delegation are not cached, since the
406                                  * cached attributes will remain valid while
407                                  * the delegation is held.
408                                  */
409                                 NFSINVALATTRCACHE(np);
410                                 NFSUNLOCKNODE(np);
411 #endif
412                                 (void) nfscl_deleg(nmp->nm_mountp,
413                                     op->nfso_own->nfsow_clp,
414                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
415                         }
416                 } else {
417                         error = EIO;
418                 }
419                 newnfs_copyincred(cred, &op->nfso_cred);
420             } else if (ret == NFSCLOPEN_SETCRED)
421                 /*
422                  * This is a new local open on a delegation. It needs
423                  * to have credentials so that an open can be done
424                  * against the server during recovery.
425                  */
426                 newnfs_copyincred(cred, &op->nfso_cred);
427
428             /*
429              * nfso_opencnt is the count of how many VOP_OPEN()s have
430              * been done on this Open successfully and a VOP_CLOSE()
431              * is expected for each of these.
432              * If error is non-zero, don't increment it, since the Open
433              * hasn't succeeded yet.
434              */
435             if (!error)
436                 op->nfso_opencnt++;
437             nfscl_openrelease(nmp, op, error, newone);
438             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
439                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
440                 error == NFSERR_BADSESSION) {
441                 (void) nfs_catnap(PZERO, error, "nfs_open");
442             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
443                 && clidrev != 0) {
444                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
445                 retrycnt++;
446             }
447         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
448             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
449             error == NFSERR_BADSESSION ||
450             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
451              expireret == 0 && clidrev != 0 && retrycnt < 4));
452         if (error && retrycnt >= 4)
453                 error = EIO;
454         return (error);
455 }
456
457 /*
458  * the actual open rpc
459  */
460 APPLESTATIC int
461 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
462     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
463     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
464     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
465     int syscred, int recursed)
466 {
467         u_int32_t *tl;
468         struct nfsrv_descript nfsd, *nd = &nfsd;
469         struct nfscldeleg *dp, *ndp = NULL;
470         struct nfsvattr nfsva;
471         u_int32_t rflags, deleg;
472         nfsattrbit_t attrbits;
473         int error, ret, acesize, limitby;
474         struct nfsclsession *tsep;
475
476         dp = *dpp;
477         *dpp = NULL;
478         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
479         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
480         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
481         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
482         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
483         tsep = nfsmnt_mdssession(nmp);
484         *tl++ = tsep->nfsess_clientid.lval[0];
485         *tl = tsep->nfsess_clientid.lval[1];
486         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
487         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
488         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
489         if (reclaim) {
490                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
491                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
492                 *tl = txdr_unsigned(delegtype);
493         } else {
494                 if (dp != NULL) {
495                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
496                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
497                         if (NFSHASNFSV4N(nmp))
498                                 *tl++ = 0;
499                         else
500                                 *tl++ = dp->nfsdl_stateid.seqid;
501                         *tl++ = dp->nfsdl_stateid.other[0];
502                         *tl++ = dp->nfsdl_stateid.other[1];
503                         *tl = dp->nfsdl_stateid.other[2];
504                 } else {
505                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
506                 }
507                 (void) nfsm_strtom(nd, name, namelen);
508         }
509         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
510         *tl = txdr_unsigned(NFSV4OP_GETATTR);
511         NFSZERO_ATTRBIT(&attrbits);
512         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
513         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
514         (void) nfsrv_putattrbit(nd, &attrbits);
515         if (syscred)
516                 nd->nd_flag |= ND_USEGSSNAME;
517         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
518             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
519         if (error)
520                 return (error);
521         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
522         if (!nd->nd_repstat) {
523                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
524                     6 * NFSX_UNSIGNED);
525                 op->nfso_stateid.seqid = *tl++;
526                 op->nfso_stateid.other[0] = *tl++;
527                 op->nfso_stateid.other[1] = *tl++;
528                 op->nfso_stateid.other[2] = *tl;
529                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
530                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
531                 if (error)
532                         goto nfsmout;
533                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
534                 deleg = fxdr_unsigned(u_int32_t, *tl);
535                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
536                     deleg == NFSV4OPEN_DELEGATEWRITE) {
537                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
538                               NFSCLFLAGS_FIRSTDELEG))
539                                 op->nfso_own->nfsow_clp->nfsc_flags |=
540                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
541                         ndp = malloc(
542                             sizeof (struct nfscldeleg) + newfhlen,
543                             M_NFSCLDELEG, M_WAITOK);
544                         LIST_INIT(&ndp->nfsdl_owner);
545                         LIST_INIT(&ndp->nfsdl_lock);
546                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
547                         ndp->nfsdl_fhlen = newfhlen;
548                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
549                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
550                         nfscl_lockinit(&ndp->nfsdl_rwlock);
551                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
552                             NFSX_UNSIGNED);
553                         ndp->nfsdl_stateid.seqid = *tl++;
554                         ndp->nfsdl_stateid.other[0] = *tl++;
555                         ndp->nfsdl_stateid.other[1] = *tl++;
556                         ndp->nfsdl_stateid.other[2] = *tl++;
557                         ret = fxdr_unsigned(int, *tl);
558                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
559                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
560                                 /*
561                                  * Indicates how much the file can grow.
562                                  */
563                                 NFSM_DISSECT(tl, u_int32_t *,
564                                     3 * NFSX_UNSIGNED);
565                                 limitby = fxdr_unsigned(int, *tl++);
566                                 switch (limitby) {
567                                 case NFSV4OPEN_LIMITSIZE:
568                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
569                                         break;
570                                 case NFSV4OPEN_LIMITBLOCKS:
571                                         ndp->nfsdl_sizelimit =
572                                             fxdr_unsigned(u_int64_t, *tl++);
573                                         ndp->nfsdl_sizelimit *=
574                                             fxdr_unsigned(u_int64_t, *tl);
575                                         break;
576                                 default:
577                                         error = NFSERR_BADXDR;
578                                         goto nfsmout;
579                                 }
580                         } else {
581                                 ndp->nfsdl_flags = NFSCLDL_READ;
582                         }
583                         if (ret)
584                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
585                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
586                             &acesize, p);
587                         if (error)
588                                 goto nfsmout;
589                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
590                         error = NFSERR_BADXDR;
591                         goto nfsmout;
592                 }
593                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
594                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
595                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
596                     NULL, NULL, NULL, p, cred);
597                 if (error)
598                         goto nfsmout;
599                 if (ndp != NULL) {
600                         ndp->nfsdl_change = nfsva.na_filerev;
601                         ndp->nfsdl_modtime = nfsva.na_mtime;
602                         ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
603                 }
604                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
605                     do {
606                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
607                             cred, p);
608                         if (ret == NFSERR_DELAY)
609                             (void) nfs_catnap(PZERO, ret, "nfs_open");
610                     } while (ret == NFSERR_DELAY);
611                     error = ret;
612                 }
613                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
614                     nfscl_assumeposixlocks)
615                     op->nfso_posixlock = 1;
616                 else
617                     op->nfso_posixlock = 0;
618
619                 /*
620                  * If the server is handing out delegations, but we didn't
621                  * get one because an OpenConfirm was required, try the
622                  * Open again, to get a delegation. This is a harmless no-op,
623                  * from a server's point of view.
624                  */
625                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
626                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
627                     && !error && dp == NULL && ndp == NULL && !recursed) {
628                     do {
629                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
630                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
631                             cred, p, syscred, 1);
632                         if (ret == NFSERR_DELAY)
633                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
634                     } while (ret == NFSERR_DELAY);
635                     if (ret) {
636                         if (ndp != NULL) {
637                                 free(ndp, M_NFSCLDELEG);
638                                 ndp = NULL;
639                         }
640                         if (ret == NFSERR_STALECLIENTID ||
641                             ret == NFSERR_STALEDONTRECOVER ||
642                             ret == NFSERR_BADSESSION)
643                                 error = ret;
644                     }
645                 }
646         }
647         if (nd->nd_repstat != 0 && error == 0)
648                 error = nd->nd_repstat;
649         if (error == NFSERR_STALECLIENTID)
650                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
651 nfsmout:
652         if (!error)
653                 *dpp = ndp;
654         else if (ndp != NULL)
655                 free(ndp, M_NFSCLDELEG);
656         mbuf_freem(nd->nd_mrep);
657         return (error);
658 }
659
660 /*
661  * open downgrade rpc
662  */
663 APPLESTATIC int
664 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
665     struct ucred *cred, NFSPROC_T *p)
666 {
667         u_int32_t *tl;
668         struct nfsrv_descript nfsd, *nd = &nfsd;
669         int error;
670
671         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
672         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
673         if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
674                 *tl++ = 0;
675         else
676                 *tl++ = op->nfso_stateid.seqid;
677         *tl++ = op->nfso_stateid.other[0];
678         *tl++ = op->nfso_stateid.other[1];
679         *tl++ = op->nfso_stateid.other[2];
680         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
681         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
682         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
683         error = nfscl_request(nd, vp, p, cred, NULL);
684         if (error)
685                 return (error);
686         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
687         if (!nd->nd_repstat) {
688                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
689                 op->nfso_stateid.seqid = *tl++;
690                 op->nfso_stateid.other[0] = *tl++;
691                 op->nfso_stateid.other[1] = *tl++;
692                 op->nfso_stateid.other[2] = *tl;
693         }
694         if (nd->nd_repstat && error == 0)
695                 error = nd->nd_repstat;
696         if (error == NFSERR_STALESTATEID)
697                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
698 nfsmout:
699         mbuf_freem(nd->nd_mrep);
700         return (error);
701 }
702
703 /*
704  * V4 Close operation.
705  */
706 APPLESTATIC int
707 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
708 {
709         struct nfsclclient *clp;
710         int error;
711
712         if (vnode_vtype(vp) != VREG)
713                 return (0);
714         if (doclose)
715                 error = nfscl_doclose(vp, &clp, p);
716         else
717                 error = nfscl_getclose(vp, &clp);
718         if (error)
719                 return (error);
720
721         nfscl_clientrelease(clp);
722         return (0);
723 }
724
725 /*
726  * Close the open.
727  */
728 APPLESTATIC void
729 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
730 {
731         struct nfsrv_descript nfsd, *nd = &nfsd;
732         struct nfscllockowner *lp, *nlp;
733         struct nfscllock *lop, *nlop;
734         struct ucred *tcred;
735         u_int64_t off = 0, len = 0;
736         u_int32_t type = NFSV4LOCKT_READ;
737         int error, do_unlock, trycnt;
738
739         tcred = newnfs_getcred();
740         newnfs_copycred(&op->nfso_cred, tcred);
741         /*
742          * (Theoretically this could be done in the same
743          *  compound as the close, but having multiple
744          *  sequenced Ops in the same compound might be
745          *  too scary for some servers.)
746          */
747         if (op->nfso_posixlock) {
748                 off = 0;
749                 len = NFS64BITSSET;
750                 type = NFSV4LOCKT_READ;
751         }
752
753         /*
754          * Since this function is only called from VOP_INACTIVE(), no
755          * other thread will be manipulating this Open. As such, the
756          * lock lists are not being changed by other threads, so it should
757          * be safe to do this without locking.
758          */
759         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
760                 do_unlock = 1;
761                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
762                         if (op->nfso_posixlock == 0) {
763                                 off = lop->nfslo_first;
764                                 len = lop->nfslo_end - lop->nfslo_first;
765                                 if (lop->nfslo_type == F_WRLCK)
766                                         type = NFSV4LOCKT_WRITE;
767                                 else
768                                         type = NFSV4LOCKT_READ;
769                         }
770                         if (do_unlock) {
771                                 trycnt = 0;
772                                 do {
773                                         error = nfsrpc_locku(nd, nmp, lp, off,
774                                             len, type, tcred, p, 0);
775                                         if ((nd->nd_repstat == NFSERR_GRACE ||
776                                             nd->nd_repstat == NFSERR_DELAY) &&
777                                             error == 0)
778                                                 (void) nfs_catnap(PZERO,
779                                                     (int)nd->nd_repstat,
780                                                     "nfs_close");
781                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
782                                     nd->nd_repstat == NFSERR_DELAY) &&
783                                     error == 0 && trycnt++ < 5);
784                                 if (op->nfso_posixlock)
785                                         do_unlock = 0;
786                         }
787                         nfscl_freelock(lop, 0);
788                 }
789                 /*
790                  * Do a ReleaseLockOwner.
791                  * The lock owner name nfsl_owner may be used by other opens for
792                  * other files but the lock_owner4 name that nfsrpc_rellockown()
793                  * puts on the wire has the file handle for this file appended
794                  * to it, so it can be done now.
795                  */
796                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
797                     lp->nfsl_open->nfso_fhlen, tcred, p);
798         }
799
800         /*
801          * There could be other Opens for different files on the same
802          * OpenOwner, so locking is required.
803          */
804         NFSLOCKCLSTATE();
805         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
806         NFSUNLOCKCLSTATE();
807         do {
808                 error = nfscl_tryclose(op, tcred, nmp, p);
809                 if (error == NFSERR_GRACE)
810                         (void) nfs_catnap(PZERO, error, "nfs_close");
811         } while (error == NFSERR_GRACE);
812         NFSLOCKCLSTATE();
813         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
814
815         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
816                 nfscl_freelockowner(lp, 0);
817         nfscl_freeopen(op, 0);
818         NFSUNLOCKCLSTATE();
819         NFSFREECRED(tcred);
820 }
821
822 /*
823  * The actual Close RPC.
824  */
825 APPLESTATIC int
826 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
827     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
828     int syscred)
829 {
830         u_int32_t *tl;
831         int error;
832
833         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
834             op->nfso_fhlen, NULL, NULL, 0, 0);
835         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
836         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
837         if (NFSHASNFSV4N(nmp))
838                 *tl++ = 0;
839         else
840                 *tl++ = op->nfso_stateid.seqid;
841         *tl++ = op->nfso_stateid.other[0];
842         *tl++ = op->nfso_stateid.other[1];
843         *tl = op->nfso_stateid.other[2];
844         if (syscred)
845                 nd->nd_flag |= ND_USEGSSNAME;
846         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
847             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
848         if (error)
849                 return (error);
850         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
851         if (nd->nd_repstat == 0)
852                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
853         error = nd->nd_repstat;
854         if (error == NFSERR_STALESTATEID)
855                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
856 nfsmout:
857         mbuf_freem(nd->nd_mrep);
858         return (error);
859 }
860
861 /*
862  * V4 Open Confirm RPC.
863  */
864 APPLESTATIC int
865 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
866     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
867 {
868         u_int32_t *tl;
869         struct nfsrv_descript nfsd, *nd = &nfsd;
870         struct nfsmount *nmp;
871         int error;
872
873         nmp = VFSTONFS(vnode_mount(vp));
874         if (NFSHASNFSV4N(nmp))
875                 return (0);             /* No confirmation for NFSv4.1. */
876         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
877             0, 0);
878         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
879         *tl++ = op->nfso_stateid.seqid;
880         *tl++ = op->nfso_stateid.other[0];
881         *tl++ = op->nfso_stateid.other[1];
882         *tl++ = op->nfso_stateid.other[2];
883         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
884         error = nfscl_request(nd, vp, p, cred, NULL);
885         if (error)
886                 return (error);
887         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
888         if (!nd->nd_repstat) {
889                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
890                 op->nfso_stateid.seqid = *tl++;
891                 op->nfso_stateid.other[0] = *tl++;
892                 op->nfso_stateid.other[1] = *tl++;
893                 op->nfso_stateid.other[2] = *tl;
894         }
895         error = nd->nd_repstat;
896         if (error == NFSERR_STALESTATEID)
897                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
898 nfsmout:
899         mbuf_freem(nd->nd_mrep);
900         return (error);
901 }
902
903 /*
904  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
905  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
906  */
907 APPLESTATIC int
908 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
909     struct ucred *cred, NFSPROC_T *p)
910 {
911         u_int32_t *tl;
912         struct nfsrv_descript nfsd;
913         struct nfsrv_descript *nd = &nfsd;
914         nfsattrbit_t attrbits;
915         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
916         u_short port;
917         int error, isinet6 = 0, callblen;
918         nfsquad_t confirm;
919         u_int32_t lease;
920         static u_int32_t rev = 0;
921         struct nfsclds *dsp;
922         struct in6_addr a6;
923         struct nfsclsession *tsep;
924
925         if (nfsboottime.tv_sec == 0)
926                 NFSSETBOOTTIME(nfsboottime);
927         clp->nfsc_rev = rev++;
928         if (NFSHASNFSV4N(nmp)) {
929                 /*
930                  * Either there was no previous session or the
931                  * previous session has failed, so...
932                  * do an ExchangeID followed by the CreateSession.
933                  */
934                 error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
935                     NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
936                 NFSCL_DEBUG(1, "aft exch=%d\n", error);
937                 if (error == 0)
938                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
939                             &nmp->nm_sockreq,
940                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
941                 if (error == 0) {
942                         NFSLOCKMNT(nmp);
943                         /*
944                          * The old sessions cannot be safely free'd
945                          * here, since they may still be used by
946                          * in-progress RPCs.
947                          */
948                         tsep = NULL;
949                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
950                                 tsep = NFSMNT_MDSSESSION(nmp);
951                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
952                             nfsclds_list);
953                         /*
954                          * Wake up RPCs waiting for a slot on the
955                          * old session. These will then fail with
956                          * NFSERR_BADSESSION and be retried with the
957                          * new session by nfsv4_setsequence().
958                          * Also wakeup() processes waiting for the
959                          * new session.
960                          */
961                         if (tsep != NULL)
962                                 wakeup(&tsep->nfsess_slots);
963                         wakeup(&nmp->nm_sess);
964                         NFSUNLOCKMNT(nmp);
965                 } else
966                         nfscl_freenfsclds(dsp);
967                 NFSCL_DEBUG(1, "aft createsess=%d\n", error);
968                 if (error == 0 && reclaim == 0) {
969                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
970                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
971                         if (error == NFSERR_COMPLETEALREADY ||
972                             error == NFSERR_NOTSUPP)
973                                 /* Ignore this error. */
974                                 error = 0;
975                 }
976                 return (error);
977         }
978
979         /*
980          * Allocate a single session structure for NFSv4.0, because some of
981          * the fields are used by NFSv4.0 although it doesn't do a session.
982          */
983         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
984         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
985         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
986         NFSLOCKMNT(nmp);
987         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
988         tsep = NFSMNT_MDSSESSION(nmp);
989         NFSUNLOCKMNT(nmp);
990
991         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
992         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
993         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
994         *tl = txdr_unsigned(clp->nfsc_rev);
995         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
996
997         /*
998          * set up the callback address
999          */
1000         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1001         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1002         callblen = strlen(nfsv4_callbackaddr);
1003         if (callblen == 0)
1004                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1005         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1006             (callblen > 0 || cp != NULL)) {
1007                 port = htons(nfsv4_cbport);
1008                 cp2 = (u_int8_t *)&port;
1009 #ifdef INET6
1010                 if ((callblen > 0 &&
1011                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1012                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1013
1014                         (void) nfsm_strtom(nd, "tcp6", 4);
1015                         if (callblen == 0) {
1016                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1017                                 ip6add = ip6buf;
1018                         } else {
1019                                 ip6add = nfsv4_callbackaddr;
1020                         }
1021                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1022                             ip6add, cp2[0], cp2[1]);
1023                 } else
1024 #endif
1025                 {
1026                         (void) nfsm_strtom(nd, "tcp", 3);
1027                         if (callblen == 0)
1028                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1029                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1030                                     cp[2], cp[3], cp2[0], cp2[1]);
1031                         else
1032                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1033                                     "%s.%d.%d", nfsv4_callbackaddr,
1034                                     cp2[0], cp2[1]);
1035                 }
1036                 (void) nfsm_strtom(nd, addr, strlen(addr));
1037         } else {
1038                 (void) nfsm_strtom(nd, "tcp", 3);
1039                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1040         }
1041         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1042         *tl = txdr_unsigned(clp->nfsc_cbident);
1043         nd->nd_flag |= ND_USEGSSNAME;
1044         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1045                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1046         if (error)
1047                 return (error);
1048         if (nd->nd_repstat == 0) {
1049             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1050             tsep->nfsess_clientid.lval[0] = *tl++;
1051             tsep->nfsess_clientid.lval[1] = *tl++;
1052             confirm.lval[0] = *tl++;
1053             confirm.lval[1] = *tl;
1054             mbuf_freem(nd->nd_mrep);
1055             nd->nd_mrep = NULL;
1056
1057             /*
1058              * and confirm it.
1059              */
1060             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1061                 NULL, 0, 0);
1062             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1063             *tl++ = tsep->nfsess_clientid.lval[0];
1064             *tl++ = tsep->nfsess_clientid.lval[1];
1065             *tl++ = confirm.lval[0];
1066             *tl = confirm.lval[1];
1067             nd->nd_flag |= ND_USEGSSNAME;
1068             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1069                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1070             if (error)
1071                 return (error);
1072             mbuf_freem(nd->nd_mrep);
1073             nd->nd_mrep = NULL;
1074             if (nd->nd_repstat == 0) {
1075                 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1076                     nmp->nm_fhsize, NULL, NULL, 0, 0);
1077                 NFSZERO_ATTRBIT(&attrbits);
1078                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1079                 (void) nfsrv_putattrbit(nd, &attrbits);
1080                 nd->nd_flag |= ND_USEGSSNAME;
1081                 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1082                     cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1083                 if (error)
1084                     return (error);
1085                 if (nd->nd_repstat == 0) {
1086                     error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1087                         NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1088                     if (error)
1089                         goto nfsmout;
1090                     clp->nfsc_renew = NFSCL_RENEW(lease);
1091                     clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1092                     clp->nfsc_clientidrev++;
1093                     if (clp->nfsc_clientidrev == 0)
1094                         clp->nfsc_clientidrev++;
1095                 }
1096             }
1097         }
1098         error = nd->nd_repstat;
1099 nfsmout:
1100         mbuf_freem(nd->nd_mrep);
1101         return (error);
1102 }
1103
1104 /*
1105  * nfs getattr call.
1106  */
1107 APPLESTATIC int
1108 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1109     struct nfsvattr *nap, void *stuff)
1110 {
1111         struct nfsrv_descript nfsd, *nd = &nfsd;
1112         int error;
1113         nfsattrbit_t attrbits;
1114         
1115         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1116         if (nd->nd_flag & ND_NFSV4) {
1117                 NFSGETATTR_ATTRBIT(&attrbits);
1118                 (void) nfsrv_putattrbit(nd, &attrbits);
1119         }
1120         error = nfscl_request(nd, vp, p, cred, stuff);
1121         if (error)
1122                 return (error);
1123         if (!nd->nd_repstat)
1124                 error = nfsm_loadattr(nd, nap);
1125         else
1126                 error = nd->nd_repstat;
1127         mbuf_freem(nd->nd_mrep);
1128         return (error);
1129 }
1130
1131 /*
1132  * nfs getattr call with non-vnode arguemnts.
1133  */
1134 APPLESTATIC int
1135 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1136     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1137     uint32_t *leasep)
1138 {
1139         struct nfsrv_descript nfsd, *nd = &nfsd;
1140         int error, vers = NFS_VER2;
1141         nfsattrbit_t attrbits;
1142         
1143         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1144         if (nd->nd_flag & ND_NFSV4) {
1145                 vers = NFS_VER4;
1146                 NFSGETATTR_ATTRBIT(&attrbits);
1147                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1148                 (void) nfsrv_putattrbit(nd, &attrbits);
1149         } else if (nd->nd_flag & ND_NFSV3) {
1150                 vers = NFS_VER3;
1151         }
1152         if (syscred)
1153                 nd->nd_flag |= ND_USEGSSNAME;
1154         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1155             NFS_PROG, vers, NULL, 1, xidp, NULL);
1156         if (error)
1157                 return (error);
1158         if (nd->nd_repstat == 0) {
1159                 if ((nd->nd_flag & ND_NFSV4) != 0)
1160                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1161                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1162                             NULL, NULL);
1163                 else
1164                         error = nfsm_loadattr(nd, nap);
1165         } else
1166                 error = nd->nd_repstat;
1167         mbuf_freem(nd->nd_mrep);
1168         return (error);
1169 }
1170
1171 /*
1172  * Do an nfs setattr operation.
1173  */
1174 APPLESTATIC int
1175 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1176     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1177     void *stuff)
1178 {
1179         int error, expireret = 0, openerr, retrycnt;
1180         u_int32_t clidrev = 0, mode;
1181         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1182         struct nfsfh *nfhp;
1183         nfsv4stateid_t stateid;
1184         void *lckp;
1185
1186         if (nmp->nm_clp != NULL)
1187                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1188         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1189                 mode = NFSV4OPEN_ACCESSWRITE;
1190         else
1191                 mode = NFSV4OPEN_ACCESSREAD;
1192         retrycnt = 0;
1193         do {
1194                 lckp = NULL;
1195                 openerr = 1;
1196                 if (NFSHASNFSV4(nmp)) {
1197                         nfhp = VTONFS(vp)->n_fhp;
1198                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1199                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1200                         if (error && vnode_vtype(vp) == VREG &&
1201                             (mode == NFSV4OPEN_ACCESSWRITE ||
1202                              nfstest_openallsetattr)) {
1203                                 /*
1204                                  * No Open stateid, so try and open the file
1205                                  * now.
1206                                  */
1207                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1208                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1209                                             p);
1210                                 else
1211                                         openerr = nfsrpc_open(vp, FREAD, cred,
1212                                             p);
1213                                 if (!openerr)
1214                                         (void) nfscl_getstateid(vp,
1215                                             nfhp->nfh_fh, nfhp->nfh_len,
1216                                             mode, 0, cred, p, &stateid, &lckp);
1217                         }
1218                 }
1219                 if (vap != NULL)
1220                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1221                             rnap, attrflagp, stuff);
1222                 else
1223                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1224                             stuff);
1225                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1226                         NFSLOCKMNT(nmp);
1227                         nmp->nm_state |= NFSSTA_OPENMODE;
1228                         NFSUNLOCKMNT(nmp);
1229                 }
1230                 if (error == NFSERR_STALESTATEID)
1231                         nfscl_initiate_recovery(nmp->nm_clp);
1232                 if (lckp != NULL)
1233                         nfscl_lockderef(lckp);
1234                 if (!openerr)
1235                         (void) nfsrpc_close(vp, 0, p);
1236                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1237                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1238                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1239                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1240                 } else if ((error == NFSERR_EXPIRED ||
1241                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1242                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1243                 }
1244                 retrycnt++;
1245         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1246             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1247             error == NFSERR_BADSESSION ||
1248             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1249             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1250              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1251             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1252              retrycnt < 4));
1253         if (error && retrycnt >= 4)
1254                 error = EIO;
1255         return (error);
1256 }
1257
1258 static int
1259 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1260     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1261     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1262 {
1263         u_int32_t *tl;
1264         struct nfsrv_descript nfsd, *nd = &nfsd;
1265         int error;
1266         nfsattrbit_t attrbits;
1267
1268         *attrflagp = 0;
1269         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1270         if (nd->nd_flag & ND_NFSV4)
1271                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1272         vap->va_type = vnode_vtype(vp);
1273         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1274         if (nd->nd_flag & ND_NFSV3) {
1275                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1276                 *tl = newnfs_false;
1277         } else if (nd->nd_flag & ND_NFSV4) {
1278                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1279                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1280                 NFSGETATTR_ATTRBIT(&attrbits);
1281                 (void) nfsrv_putattrbit(nd, &attrbits);
1282         }
1283         error = nfscl_request(nd, vp, p, cred, stuff);
1284         if (error)
1285                 return (error);
1286         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1287                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1288         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1289                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1290         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1291                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1292         mbuf_freem(nd->nd_mrep);
1293         if (nd->nd_repstat && !error)
1294                 error = nd->nd_repstat;
1295         return (error);
1296 }
1297
1298 /*
1299  * nfs lookup rpc
1300  */
1301 APPLESTATIC int
1302 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1303     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1304     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1305 {
1306         u_int32_t *tl;
1307         struct nfsrv_descript nfsd, *nd = &nfsd;
1308         struct nfsmount *nmp;
1309         struct nfsnode *np;
1310         struct nfsfh *nfhp;
1311         nfsattrbit_t attrbits;
1312         int error = 0, lookupp = 0;
1313
1314         *attrflagp = 0;
1315         *dattrflagp = 0;
1316         if (vnode_vtype(dvp) != VDIR)
1317                 return (ENOTDIR);
1318         nmp = VFSTONFS(vnode_mount(dvp));
1319         if (len > NFS_MAXNAMLEN)
1320                 return (ENAMETOOLONG);
1321         if (NFSHASNFSV4(nmp) && len == 1 &&
1322                 name[0] == '.') {
1323                 /*
1324                  * Just return the current dir's fh.
1325                  */
1326                 np = VTONFS(dvp);
1327                 nfhp = malloc(sizeof (struct nfsfh) +
1328                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1329                 nfhp->nfh_len = np->n_fhp->nfh_len;
1330                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1331                 *nfhpp = nfhp;
1332                 return (0);
1333         }
1334         if (NFSHASNFSV4(nmp) && len == 2 &&
1335                 name[0] == '.' && name[1] == '.') {
1336                 lookupp = 1;
1337                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1338         } else {
1339                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1340                 (void) nfsm_strtom(nd, name, len);
1341         }
1342         if (nd->nd_flag & ND_NFSV4) {
1343                 NFSGETATTR_ATTRBIT(&attrbits);
1344                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1345                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1346                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1347                 (void) nfsrv_putattrbit(nd, &attrbits);
1348         }
1349         error = nfscl_request(nd, dvp, p, cred, stuff);
1350         if (error)
1351                 return (error);
1352         if (nd->nd_repstat) {
1353                 /*
1354                  * When an NFSv4 Lookupp returns ENOENT, it means that
1355                  * the lookup is at the root of an fs, so return this dir.
1356                  */
1357                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1358                     np = VTONFS(dvp);
1359                     nfhp = malloc(sizeof (struct nfsfh) +
1360                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1361                     nfhp->nfh_len = np->n_fhp->nfh_len;
1362                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1363                     *nfhpp = nfhp;
1364                     mbuf_freem(nd->nd_mrep);
1365                     return (0);
1366                 }
1367                 if (nd->nd_flag & ND_NFSV3)
1368                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1369                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1370                     ND_NFSV4) {
1371                         /* Load the directory attributes. */
1372                         error = nfsm_loadattr(nd, dnap);
1373                         if (error == 0)
1374                                 *dattrflagp = 1;
1375                 }
1376                 goto nfsmout;
1377         }
1378         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1379                 /* Load the directory attributes. */
1380                 error = nfsm_loadattr(nd, dnap);
1381                 if (error != 0)
1382                         goto nfsmout;
1383                 *dattrflagp = 1;
1384                 /* Skip over the Lookup and GetFH operation status values. */
1385                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1386         }
1387         error = nfsm_getfh(nd, nfhpp);
1388         if (error)
1389                 goto nfsmout;
1390
1391         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1392         if ((nd->nd_flag & ND_NFSV3) && !error)
1393                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1394 nfsmout:
1395         mbuf_freem(nd->nd_mrep);
1396         if (!error && nd->nd_repstat)
1397                 error = nd->nd_repstat;
1398         return (error);
1399 }
1400
1401 /*
1402  * Do a readlink rpc.
1403  */
1404 APPLESTATIC int
1405 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1406     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1407 {
1408         u_int32_t *tl;
1409         struct nfsrv_descript nfsd, *nd = &nfsd;
1410         struct nfsnode *np = VTONFS(vp);
1411         nfsattrbit_t attrbits;
1412         int error, len, cangetattr = 1;
1413
1414         *attrflagp = 0;
1415         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1416         if (nd->nd_flag & ND_NFSV4) {
1417                 /*
1418                  * And do a Getattr op.
1419                  */
1420                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1421                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1422                 NFSGETATTR_ATTRBIT(&attrbits);
1423                 (void) nfsrv_putattrbit(nd, &attrbits);
1424         }
1425         error = nfscl_request(nd, vp, p, cred, stuff);
1426         if (error)
1427                 return (error);
1428         if (nd->nd_flag & ND_NFSV3)
1429                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1430         if (!nd->nd_repstat && !error) {
1431                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1432                 /*
1433                  * This seems weird to me, but must have been added to
1434                  * FreeBSD for some reason. The only thing I can think of
1435                  * is that there was/is some server that replies with
1436                  * more link data than it should?
1437                  */
1438                 if (len == NFS_MAXPATHLEN) {
1439                         NFSLOCKNODE(np);
1440                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1441                                 len = np->n_size;
1442                                 cangetattr = 0;
1443                         }
1444                         NFSUNLOCKNODE(np);
1445                 }
1446                 error = nfsm_mbufuio(nd, uiop, len);
1447                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1448                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1449         }
1450         if (nd->nd_repstat && !error)
1451                 error = nd->nd_repstat;
1452 nfsmout:
1453         mbuf_freem(nd->nd_mrep);
1454         return (error);
1455 }
1456
1457 /*
1458  * Read operation.
1459  */
1460 APPLESTATIC int
1461 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1462     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1463 {
1464         int error, expireret = 0, retrycnt;
1465         u_int32_t clidrev = 0;
1466         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1467         struct nfsnode *np = VTONFS(vp);
1468         struct ucred *newcred;
1469         struct nfsfh *nfhp = NULL;
1470         nfsv4stateid_t stateid;
1471         void *lckp;
1472
1473         if (nmp->nm_clp != NULL)
1474                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1475         newcred = cred;
1476         if (NFSHASNFSV4(nmp)) {
1477                 nfhp = np->n_fhp;
1478                 newcred = NFSNEWCRED(cred);
1479         }
1480         retrycnt = 0;
1481         do {
1482                 lckp = NULL;
1483                 if (NFSHASNFSV4(nmp))
1484                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1485                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1486                             &lckp);
1487                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1488                     attrflagp, stuff);
1489                 if (error == NFSERR_OPENMODE) {
1490                         NFSLOCKMNT(nmp);
1491                         nmp->nm_state |= NFSSTA_OPENMODE;
1492                         NFSUNLOCKMNT(nmp);
1493                 }
1494                 if (error == NFSERR_STALESTATEID)
1495                         nfscl_initiate_recovery(nmp->nm_clp);
1496                 if (lckp != NULL)
1497                         nfscl_lockderef(lckp);
1498                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1499                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1500                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1501                         (void) nfs_catnap(PZERO, error, "nfs_read");
1502                 } else if ((error == NFSERR_EXPIRED ||
1503                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1504                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1505                 }
1506                 retrycnt++;
1507         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1508             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1509             error == NFSERR_BADSESSION ||
1510             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1511             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1512              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1513             (error == NFSERR_OPENMODE && retrycnt < 4));
1514         if (error && retrycnt >= 4)
1515                 error = EIO;
1516         if (NFSHASNFSV4(nmp))
1517                 NFSFREECRED(newcred);
1518         return (error);
1519 }
1520
1521 /*
1522  * The actual read RPC.
1523  */
1524 static int
1525 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1526     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1527     int *attrflagp, void *stuff)
1528 {
1529         u_int32_t *tl;
1530         int error = 0, len, retlen, tsiz, eof = 0;
1531         struct nfsrv_descript nfsd;
1532         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1533         struct nfsrv_descript *nd = &nfsd;
1534         int rsize;
1535         off_t tmp_off;
1536
1537         *attrflagp = 0;
1538         tsiz = uio_uio_resid(uiop);
1539         tmp_off = uiop->uio_offset + tsiz;
1540         NFSLOCKMNT(nmp);
1541         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1542                 NFSUNLOCKMNT(nmp);
1543                 return (EFBIG);
1544         }
1545         rsize = nmp->nm_rsize;
1546         NFSUNLOCKMNT(nmp);
1547         nd->nd_mrep = NULL;
1548         while (tsiz > 0) {
1549                 *attrflagp = 0;
1550                 len = (tsiz > rsize) ? rsize : tsiz;
1551                 NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1552                 if (nd->nd_flag & ND_NFSV4)
1553                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1554                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1555                 if (nd->nd_flag & ND_NFSV2) {
1556                         *tl++ = txdr_unsigned(uiop->uio_offset);
1557                         *tl++ = txdr_unsigned(len);
1558                         *tl = 0;
1559                 } else {
1560                         txdr_hyper(uiop->uio_offset, tl);
1561                         *(tl + 2) = txdr_unsigned(len);
1562                 }
1563                 /*
1564                  * Since I can't do a Getattr for NFSv4 for Write, there
1565                  * doesn't seem any point in doing one here, either.
1566                  * (See the comment in nfsrpc_writerpc() for more info.)
1567                  */
1568                 error = nfscl_request(nd, vp, p, cred, stuff);
1569                 if (error)
1570                         return (error);
1571                 if (nd->nd_flag & ND_NFSV3) {
1572                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1573                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1574                         error = nfsm_loadattr(nd, nap);
1575                         if (!error)
1576                                 *attrflagp = 1;
1577                 }
1578                 if (nd->nd_repstat || error) {
1579                         if (!error)
1580                                 error = nd->nd_repstat;
1581                         goto nfsmout;
1582                 }
1583                 if (nd->nd_flag & ND_NFSV3) {
1584                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1585                         eof = fxdr_unsigned(int, *(tl + 1));
1586                 } else if (nd->nd_flag & ND_NFSV4) {
1587                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1588                         eof = fxdr_unsigned(int, *tl);
1589                 }
1590                 NFSM_STRSIZ(retlen, len);
1591                 error = nfsm_mbufuio(nd, uiop, retlen);
1592                 if (error)
1593                         goto nfsmout;
1594                 mbuf_freem(nd->nd_mrep);
1595                 nd->nd_mrep = NULL;
1596                 tsiz -= retlen;
1597                 if (!(nd->nd_flag & ND_NFSV2)) {
1598                         if (eof || retlen == 0)
1599                                 tsiz = 0;
1600                 } else if (retlen < len)
1601                         tsiz = 0;
1602         }
1603         return (0);
1604 nfsmout:
1605         if (nd->nd_mrep != NULL)
1606                 mbuf_freem(nd->nd_mrep);
1607         return (error);
1608 }
1609
1610 /*
1611  * nfs write operation
1612  * When called_from_strategy != 0, it should return EIO for an error that
1613  * indicates recovery is in progress, so that the buffer will be left
1614  * dirty and be written back to the server later. If it loops around,
1615  * the recovery thread could get stuck waiting for the buffer and recovery
1616  * will then deadlock.
1617  */
1618 APPLESTATIC int
1619 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1620     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1621     void *stuff, int called_from_strategy)
1622 {
1623         int error, expireret = 0, retrycnt, nostateid;
1624         u_int32_t clidrev = 0;
1625         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1626         struct nfsnode *np = VTONFS(vp);
1627         struct ucred *newcred;
1628         struct nfsfh *nfhp = NULL;
1629         nfsv4stateid_t stateid;
1630         void *lckp;
1631
1632         *must_commit = 0;
1633         if (nmp->nm_clp != NULL)
1634                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1635         newcred = cred;
1636         if (NFSHASNFSV4(nmp)) {
1637                 newcred = NFSNEWCRED(cred);
1638                 nfhp = np->n_fhp;
1639         }
1640         retrycnt = 0;
1641         do {
1642                 lckp = NULL;
1643                 nostateid = 0;
1644                 if (NFSHASNFSV4(nmp)) {
1645                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1646                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1647                             &lckp);
1648                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1649                             stateid.other[2] == 0) {
1650                                 nostateid = 1;
1651                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1652                         }
1653                 }
1654
1655                 /*
1656                  * If there is no stateid for NFSv4, it means this is an
1657                  * extraneous write after close. Basically a poorly
1658                  * implemented buffer cache. Just don't do the write.
1659                  */
1660                 if (nostateid)
1661                         error = 0;
1662                 else
1663                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1664                             newcred, &stateid, p, nap, attrflagp, stuff);
1665                 if (error == NFSERR_STALESTATEID)
1666                         nfscl_initiate_recovery(nmp->nm_clp);
1667                 if (lckp != NULL)
1668                         nfscl_lockderef(lckp);
1669                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1670                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1671                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1672                         (void) nfs_catnap(PZERO, error, "nfs_write");
1673                 } else if ((error == NFSERR_EXPIRED ||
1674                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1675                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1676                 }
1677                 retrycnt++;
1678         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1679             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1680               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1681             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1682             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1683              expireret == 0 && clidrev != 0 && retrycnt < 4));
1684         if (error != 0 && (retrycnt >= 4 ||
1685             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1686               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1687                 error = EIO;
1688         if (NFSHASNFSV4(nmp))
1689                 NFSFREECRED(newcred);
1690         return (error);
1691 }
1692
1693 /*
1694  * The actual write RPC.
1695  */
1696 static int
1697 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1698     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1699     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1700 {
1701         u_int32_t *tl;
1702         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1703         struct nfsnode *np = VTONFS(vp);
1704         int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1705         int wccflag = 0, wsize;
1706         int32_t backup;
1707         struct nfsrv_descript nfsd;
1708         struct nfsrv_descript *nd = &nfsd;
1709         nfsattrbit_t attrbits;
1710         off_t tmp_off;
1711
1712         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1713         *attrflagp = 0;
1714         tsiz = uio_uio_resid(uiop);
1715         tmp_off = uiop->uio_offset + tsiz;
1716         NFSLOCKMNT(nmp);
1717         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1718                 NFSUNLOCKMNT(nmp);
1719                 return (EFBIG);
1720         }
1721         wsize = nmp->nm_wsize;
1722         NFSUNLOCKMNT(nmp);
1723         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1724         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1725         while (tsiz > 0) {
1726                 *attrflagp = 0;
1727                 len = (tsiz > wsize) ? wsize : tsiz;
1728                 NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1729                 if (nd->nd_flag & ND_NFSV4) {
1730                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1731                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1732                         txdr_hyper(uiop->uio_offset, tl);
1733                         tl += 2;
1734                         *tl++ = txdr_unsigned(*iomode);
1735                         *tl = txdr_unsigned(len);
1736                 } else if (nd->nd_flag & ND_NFSV3) {
1737                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1738                         txdr_hyper(uiop->uio_offset, tl);
1739                         tl += 2;
1740                         *tl++ = txdr_unsigned(len);
1741                         *tl++ = txdr_unsigned(*iomode);
1742                         *tl = txdr_unsigned(len);
1743                 } else {
1744                         u_int32_t x;
1745
1746                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1747                         /*
1748                          * Not sure why someone changed this, since the
1749                          * RFC clearly states that "beginoffset" and
1750                          * "totalcount" are ignored, but it wouldn't
1751                          * surprise me if there's a busted server out there.
1752                          */
1753                         /* Set both "begin" and "current" to non-garbage. */
1754                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1755                         *tl++ = x;      /* "begin offset" */
1756                         *tl++ = x;      /* "current offset" */
1757                         x = txdr_unsigned(len);
1758                         *tl++ = x;      /* total to this offset */
1759                         *tl = x;        /* size of this write */
1760
1761                 }
1762                 nfsm_uiombuf(nd, uiop, len);
1763                 /*
1764                  * Although it is tempting to do a normal Getattr Op in the
1765                  * NFSv4 compound, the result can be a nearly hung client
1766                  * system if the Getattr asks for Owner and/or OwnerGroup.
1767                  * It occurs when the client can't map either the Owner or
1768                  * Owner_group name in the Getattr reply to a uid/gid. When
1769                  * there is a cache miss, the kernel does an upcall to the
1770                  * nfsuserd. Then, it can try and read the local /etc/passwd
1771                  * or /etc/group file. It can then block in getnewbuf(),
1772                  * waiting for dirty writes to be pushed to the NFS server.
1773                  * The only reason this doesn't result in a complete
1774                  * deadlock, is that the upcall times out and allows
1775                  * the write to complete. However, progress is so slow
1776                  * that it might just as well be deadlocked.
1777                  * As such, we get the rest of the attributes, but not
1778                  * Owner or Owner_group.
1779                  * nb: nfscl_loadattrcache() needs to be told that these
1780                  *     partial attributes from a write rpc are being
1781                  *     passed in, via a argument flag.
1782                  */
1783                 if (nd->nd_flag & ND_NFSV4) {
1784                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1785                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1786                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1787                         (void) nfsrv_putattrbit(nd, &attrbits);
1788                 }
1789                 error = nfscl_request(nd, vp, p, cred, stuff);
1790                 if (error)
1791                         return (error);
1792                 if (nd->nd_repstat) {
1793                         /*
1794                          * In case the rpc gets retried, roll
1795                          * the uio fileds changed by nfsm_uiombuf()
1796                          * back.
1797                          */
1798                         uiop->uio_offset -= len;
1799                         uio_uio_resid_add(uiop, len);
1800                         uio_iov_base_add(uiop, -len);
1801                         uio_iov_len_add(uiop, len);
1802                 }
1803                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1804                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1805                             &wccflag, stuff);
1806                         if (error)
1807                                 goto nfsmout;
1808                 }
1809                 if (!nd->nd_repstat) {
1810                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1811                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1812                                         + NFSX_VERF);
1813                                 rlen = fxdr_unsigned(int, *tl++);
1814                                 if (rlen == 0) {
1815                                         error = NFSERR_IO;
1816                                         goto nfsmout;
1817                                 } else if (rlen < len) {
1818                                         backup = len - rlen;
1819                                         uio_iov_base_add(uiop, -(backup));
1820                                         uio_iov_len_add(uiop, backup);
1821                                         uiop->uio_offset -= backup;
1822                                         uio_uio_resid_add(uiop, backup);
1823                                         len = rlen;
1824                                 }
1825                                 commit = fxdr_unsigned(int, *tl++);
1826
1827                                 /*
1828                                  * Return the lowest commitment level
1829                                  * obtained by any of the RPCs.
1830                                  */
1831                                 if (committed == NFSWRITE_FILESYNC)
1832                                         committed = commit;
1833                                 else if (committed == NFSWRITE_DATASYNC &&
1834                                         commit == NFSWRITE_UNSTABLE)
1835                                         committed = commit;
1836                                 NFSLOCKMNT(nmp);
1837                                 if (!NFSHASWRITEVERF(nmp)) {
1838                                         NFSBCOPY((caddr_t)tl,
1839                                             (caddr_t)&nmp->nm_verf[0],
1840                                             NFSX_VERF);
1841                                         NFSSETWRITEVERF(nmp);
1842                                 } else if (NFSBCMP(tl, nmp->nm_verf,
1843                                     NFSX_VERF)) {
1844                                         *must_commit = 1;
1845                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1846                                 }
1847                                 NFSUNLOCKMNT(nmp);
1848                         }
1849                         if (nd->nd_flag & ND_NFSV4)
1850                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1851                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1852                                 error = nfsm_loadattr(nd, nap);
1853                                 if (!error)
1854                                         *attrflagp = NFS_LATTR_NOSHRINK;
1855                         }
1856                 } else {
1857                         error = nd->nd_repstat;
1858                 }
1859                 if (error)
1860                         goto nfsmout;
1861                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1862                 mbuf_freem(nd->nd_mrep);
1863                 nd->nd_mrep = NULL;
1864                 tsiz -= len;
1865         }
1866 nfsmout:
1867         if (nd->nd_mrep != NULL)
1868                 mbuf_freem(nd->nd_mrep);
1869         *iomode = committed;
1870         if (nd->nd_repstat && !error)
1871                 error = nd->nd_repstat;
1872         return (error);
1873 }
1874
1875 /*
1876  * nfs mknod rpc
1877  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1878  * mode set to specify the file type and the size field for rdev.
1879  */
1880 APPLESTATIC int
1881 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1882     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1883     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1884     int *attrflagp, int *dattrflagp, void *dstuff)
1885 {
1886         u_int32_t *tl;
1887         int error = 0;
1888         struct nfsrv_descript nfsd, *nd = &nfsd;
1889         nfsattrbit_t attrbits;
1890
1891         *nfhpp = NULL;
1892         *attrflagp = 0;
1893         *dattrflagp = 0;
1894         if (namelen > NFS_MAXNAMLEN)
1895                 return (ENAMETOOLONG);
1896         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1897         if (nd->nd_flag & ND_NFSV4) {
1898                 if (vtyp == VBLK || vtyp == VCHR) {
1899                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1900                         *tl++ = vtonfsv34_type(vtyp);
1901                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1902                         *tl = txdr_unsigned(NFSMINOR(rdev));
1903                 } else {
1904                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1905                         *tl = vtonfsv34_type(vtyp);
1906                 }
1907         }
1908         (void) nfsm_strtom(nd, name, namelen);
1909         if (nd->nd_flag & ND_NFSV3) {
1910                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1911                 *tl = vtonfsv34_type(vtyp);
1912         }
1913         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1914                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
1915         if ((nd->nd_flag & ND_NFSV3) &&
1916             (vtyp == VCHR || vtyp == VBLK)) {
1917                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1918                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1919                 *tl = txdr_unsigned(NFSMINOR(rdev));
1920         }
1921         if (nd->nd_flag & ND_NFSV4) {
1922                 NFSGETATTR_ATTRBIT(&attrbits);
1923                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1924                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1925                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1926                 (void) nfsrv_putattrbit(nd, &attrbits);
1927         }
1928         if (nd->nd_flag & ND_NFSV2)
1929                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1930         error = nfscl_request(nd, dvp, p, cred, dstuff);
1931         if (error)
1932                 return (error);
1933         if (nd->nd_flag & ND_NFSV4)
1934                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1935         if (!nd->nd_repstat) {
1936                 if (nd->nd_flag & ND_NFSV4) {
1937                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1938                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1939                         if (error)
1940                                 goto nfsmout;
1941                 }
1942                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1943                 if (error)
1944                         goto nfsmout;
1945         }
1946         if (nd->nd_flag & ND_NFSV3)
1947                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1948         if (!error && nd->nd_repstat)
1949                 error = nd->nd_repstat;
1950 nfsmout:
1951         mbuf_freem(nd->nd_mrep);
1952         return (error);
1953 }
1954
1955 /*
1956  * nfs file create call
1957  * Mostly just call the approriate routine. (I separated out v4, so that
1958  * error recovery wouldn't be as difficult.)
1959  */
1960 APPLESTATIC int
1961 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1962     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1963     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1964     int *attrflagp, int *dattrflagp, void *dstuff)
1965 {
1966         int error = 0, newone, expireret = 0, retrycnt, unlocked;
1967         struct nfsclowner *owp;
1968         struct nfscldeleg *dp;
1969         struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1970         u_int32_t clidrev;
1971
1972         if (NFSHASNFSV4(nmp)) {
1973             retrycnt = 0;
1974             do {
1975                 dp = NULL;
1976                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1977                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1978                     NULL, 1);
1979                 if (error)
1980                         return (error);
1981                 if (nmp->nm_clp != NULL)
1982                         clidrev = nmp->nm_clp->nfsc_clientidrev;
1983                 else
1984                         clidrev = 0;
1985                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
1986                     nfs_numnfscbd == 0 || retrycnt > 0)
1987                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
1988                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1989                           attrflagp, dattrflagp, dstuff, &unlocked);
1990                 else
1991                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
1992                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1993                           attrflagp, dattrflagp, dstuff, &unlocked);
1994                 /*
1995                  * There is no need to invalidate cached attributes here,
1996                  * since new post-delegation issue attributes are always
1997                  * returned by nfsrpc_createv4() and these will update the
1998                  * attribute cache.
1999                  */
2000                 if (dp != NULL)
2001                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2002                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2003                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2004                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2005                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2006                     error == NFSERR_BADSESSION) {
2007                         (void) nfs_catnap(PZERO, error, "nfs_open");
2008                 } else if ((error == NFSERR_EXPIRED ||
2009                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2010                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2011                         retrycnt++;
2012                 }
2013             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2014                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2015                 error == NFSERR_BADSESSION ||
2016                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2017                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2018             if (error && retrycnt >= 4)
2019                     error = EIO;
2020         } else {
2021                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2022                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2023                     dstuff);
2024         }
2025         return (error);
2026 }
2027
2028 /*
2029  * The create rpc for v2 and 3.
2030  */
2031 static int
2032 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2033     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2034     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2035     int *attrflagp, int *dattrflagp, void *dstuff)
2036 {
2037         u_int32_t *tl;
2038         int error = 0;
2039         struct nfsrv_descript nfsd, *nd = &nfsd;
2040
2041         *nfhpp = NULL;
2042         *attrflagp = 0;
2043         *dattrflagp = 0;
2044         if (namelen > NFS_MAXNAMLEN)
2045                 return (ENAMETOOLONG);
2046         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2047         (void) nfsm_strtom(nd, name, namelen);
2048         if (nd->nd_flag & ND_NFSV3) {
2049                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2050                 if (fmode & O_EXCL) {
2051                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2052                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2053                         *tl++ = cverf.lval[0];
2054                         *tl = cverf.lval[1];
2055                 } else {
2056                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2057                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2058                 }
2059         } else {
2060                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2061         }
2062         error = nfscl_request(nd, dvp, p, cred, dstuff);
2063         if (error)
2064                 return (error);
2065         if (nd->nd_repstat == 0) {
2066                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2067                 if (error)
2068                         goto nfsmout;
2069         }
2070         if (nd->nd_flag & ND_NFSV3)
2071                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2072         if (nd->nd_repstat != 0 && error == 0)
2073                 error = nd->nd_repstat;
2074 nfsmout:
2075         mbuf_freem(nd->nd_mrep);
2076         return (error);
2077 }
2078
2079 static int
2080 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2081     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2082     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2083     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2084     int *dattrflagp, void *dstuff, int *unlockedp)
2085 {
2086         u_int32_t *tl;
2087         int error = 0, deleg, newone, ret, acesize, limitby;
2088         struct nfsrv_descript nfsd, *nd = &nfsd;
2089         struct nfsclopen *op;
2090         struct nfscldeleg *dp = NULL;
2091         struct nfsnode *np;
2092         struct nfsfh *nfhp;
2093         nfsattrbit_t attrbits;
2094         nfsv4stateid_t stateid;
2095         u_int32_t rflags;
2096         struct nfsmount *nmp;
2097         struct nfsclsession *tsep;
2098
2099         nmp = VFSTONFS(dvp->v_mount);
2100         np = VTONFS(dvp);
2101         *unlockedp = 0;
2102         *nfhpp = NULL;
2103         *dpp = NULL;
2104         *attrflagp = 0;
2105         *dattrflagp = 0;
2106         if (namelen > NFS_MAXNAMLEN)
2107                 return (ENAMETOOLONG);
2108         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2109         /*
2110          * For V4, this is actually an Open op.
2111          */
2112         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2113         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2114         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2115             NFSV4OPEN_ACCESSREAD);
2116         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2117         tsep = nfsmnt_mdssession(nmp);
2118         *tl++ = tsep->nfsess_clientid.lval[0];
2119         *tl = tsep->nfsess_clientid.lval[1];
2120         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2121         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2122         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2123         if (fmode & O_EXCL) {
2124                 if (NFSHASNFSV4N(nmp)) {
2125                         if (NFSHASSESSPERSIST(nmp)) {
2126                                 /* Use GUARDED for persistent sessions. */
2127                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2128                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2129                         } else {
2130                                 /* Otherwise, use EXCLUSIVE4_1. */
2131                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2132                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2133                                 *tl++ = cverf.lval[0];
2134                                 *tl = cverf.lval[1];
2135                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2136                         }
2137                 } else {
2138                         /* NFSv4.0 */
2139                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2140                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2141                         *tl++ = cverf.lval[0];
2142                         *tl = cverf.lval[1];
2143                 }
2144         } else {
2145                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2146                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2147         }
2148         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2149         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2150         (void) nfsm_strtom(nd, name, namelen);
2151         /* Get the new file's handle and attributes. */
2152         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2153         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2154         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2155         NFSGETATTR_ATTRBIT(&attrbits);
2156         (void) nfsrv_putattrbit(nd, &attrbits);
2157         /* Get the directory's post-op attributes. */
2158         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2159         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2160         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2161         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2162         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2163         (void) nfsrv_putattrbit(nd, &attrbits);
2164         error = nfscl_request(nd, dvp, p, cred, dstuff);
2165         if (error)
2166                 return (error);
2167         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2168         if (nd->nd_repstat == 0) {
2169                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2170                     6 * NFSX_UNSIGNED);
2171                 stateid.seqid = *tl++;
2172                 stateid.other[0] = *tl++;
2173                 stateid.other[1] = *tl++;
2174                 stateid.other[2] = *tl;
2175                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2176                 (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2177                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2178                 deleg = fxdr_unsigned(int, *tl);
2179                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2180                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2181                         if (!(owp->nfsow_clp->nfsc_flags &
2182                               NFSCLFLAGS_FIRSTDELEG))
2183                                 owp->nfsow_clp->nfsc_flags |=
2184                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2185                         dp = malloc(
2186                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2187                             M_NFSCLDELEG, M_WAITOK);
2188                         LIST_INIT(&dp->nfsdl_owner);
2189                         LIST_INIT(&dp->nfsdl_lock);
2190                         dp->nfsdl_clp = owp->nfsow_clp;
2191                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2192                         nfscl_lockinit(&dp->nfsdl_rwlock);
2193                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2194                             NFSX_UNSIGNED);
2195                         dp->nfsdl_stateid.seqid = *tl++;
2196                         dp->nfsdl_stateid.other[0] = *tl++;
2197                         dp->nfsdl_stateid.other[1] = *tl++;
2198                         dp->nfsdl_stateid.other[2] = *tl++;
2199                         ret = fxdr_unsigned(int, *tl);
2200                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2201                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2202                                 /*
2203                                  * Indicates how much the file can grow.
2204                                  */
2205                                 NFSM_DISSECT(tl, u_int32_t *,
2206                                     3 * NFSX_UNSIGNED);
2207                                 limitby = fxdr_unsigned(int, *tl++);
2208                                 switch (limitby) {
2209                                 case NFSV4OPEN_LIMITSIZE:
2210                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2211                                         break;
2212                                 case NFSV4OPEN_LIMITBLOCKS:
2213                                         dp->nfsdl_sizelimit =
2214                                             fxdr_unsigned(u_int64_t, *tl++);
2215                                         dp->nfsdl_sizelimit *=
2216                                             fxdr_unsigned(u_int64_t, *tl);
2217                                         break;
2218                                 default:
2219                                         error = NFSERR_BADXDR;
2220                                         goto nfsmout;
2221                                 }
2222                         } else {
2223                                 dp->nfsdl_flags = NFSCLDL_READ;
2224                         }
2225                         if (ret)
2226                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2227                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2228                             &acesize, p);
2229                         if (error)
2230                                 goto nfsmout;
2231                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2232                         error = NFSERR_BADXDR;
2233                         goto nfsmout;
2234                 }
2235                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2236                 if (error)
2237                         goto nfsmout;
2238                 /* Get rid of the PutFH and Getattr status values. */
2239                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2240                 /* Load the directory attributes. */
2241                 error = nfsm_loadattr(nd, dnap);
2242                 if (error)
2243                         goto nfsmout;
2244                 *dattrflagp = 1;
2245                 if (dp != NULL && *attrflagp) {
2246                         dp->nfsdl_change = nnap->na_filerev;
2247                         dp->nfsdl_modtime = nnap->na_mtime;
2248                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2249                 }
2250                 /*
2251                  * We can now complete the Open state.
2252                  */
2253                 nfhp = *nfhpp;
2254                 if (dp != NULL) {
2255                         dp->nfsdl_fhlen = nfhp->nfh_len;
2256                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2257                 }
2258                 /*
2259                  * Get an Open structure that will be
2260                  * attached to the OpenOwner, acquired already.
2261                  */
2262                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2263                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2264                     cred, p, NULL, &op, &newone, NULL, 0);
2265                 if (error)
2266                         goto nfsmout;
2267                 op->nfso_stateid = stateid;
2268                 newnfs_copyincred(cred, &op->nfso_cred);
2269                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2270                     do {
2271                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2272                             nfhp->nfh_len, op, cred, p);
2273                         if (ret == NFSERR_DELAY)
2274                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2275                     } while (ret == NFSERR_DELAY);
2276                     error = ret;
2277                 }
2278
2279                 /*
2280                  * If the server is handing out delegations, but we didn't
2281                  * get one because an OpenConfirm was required, try the
2282                  * Open again, to get a delegation. This is a harmless no-op,
2283                  * from a server's point of view.
2284                  */
2285                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2286                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2287                     !error && dp == NULL) {
2288                     do {
2289                         ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2290                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2291                             nfhp->nfh_fh, nfhp->nfh_len,
2292                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2293                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2294                         if (ret == NFSERR_DELAY)
2295                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2296                     } while (ret == NFSERR_DELAY);
2297                     if (ret) {
2298                         if (dp != NULL) {
2299                                 free(dp, M_NFSCLDELEG);
2300                                 dp = NULL;
2301                         }
2302                         if (ret == NFSERR_STALECLIENTID ||
2303                             ret == NFSERR_STALEDONTRECOVER ||
2304                             ret == NFSERR_BADSESSION)
2305                                 error = ret;
2306                     }
2307                 }
2308                 nfscl_openrelease(nmp, op, error, newone);
2309                 *unlockedp = 1;
2310         }
2311         if (nd->nd_repstat != 0 && error == 0)
2312                 error = nd->nd_repstat;
2313         if (error == NFSERR_STALECLIENTID)
2314                 nfscl_initiate_recovery(owp->nfsow_clp);
2315 nfsmout:
2316         if (!error)
2317                 *dpp = dp;
2318         else if (dp != NULL)
2319                 free(dp, M_NFSCLDELEG);
2320         mbuf_freem(nd->nd_mrep);
2321         return (error);
2322 }
2323
2324 /*
2325  * Nfs remove rpc
2326  */
2327 APPLESTATIC int
2328 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2329     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2330     void *dstuff)
2331 {
2332         u_int32_t *tl;
2333         struct nfsrv_descript nfsd, *nd = &nfsd;
2334         struct nfsnode *np;
2335         struct nfsmount *nmp;
2336         nfsv4stateid_t dstateid;
2337         int error, ret = 0, i;
2338
2339         *dattrflagp = 0;
2340         if (namelen > NFS_MAXNAMLEN)
2341                 return (ENAMETOOLONG);
2342         nmp = VFSTONFS(vnode_mount(dvp));
2343 tryagain:
2344         if (NFSHASNFSV4(nmp) && ret == 0) {
2345                 ret = nfscl_removedeleg(vp, p, &dstateid);
2346                 if (ret == 1) {
2347                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2348                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2349                             NFSX_UNSIGNED);
2350                         if (NFSHASNFSV4N(nmp))
2351                                 *tl++ = 0;
2352                         else
2353                                 *tl++ = dstateid.seqid;
2354                         *tl++ = dstateid.other[0];
2355                         *tl++ = dstateid.other[1];
2356                         *tl++ = dstateid.other[2];
2357                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2358                         np = VTONFS(dvp);
2359                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2360                             np->n_fhp->nfh_len, 0);
2361                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2362                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2363                 }
2364         } else {
2365                 ret = 0;
2366         }
2367         if (ret == 0)
2368                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2369         (void) nfsm_strtom(nd, name, namelen);
2370         error = nfscl_request(nd, dvp, p, cred, dstuff);
2371         if (error)
2372                 return (error);
2373         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2374                 /* For NFSv4, parse out any Delereturn replies. */
2375                 if (ret > 0 && nd->nd_repstat != 0 &&
2376                     (nd->nd_flag & ND_NOMOREDATA)) {
2377                         /*
2378                          * If the Delegreturn failed, try again without
2379                          * it. The server will Recall, as required.
2380                          */
2381                         mbuf_freem(nd->nd_mrep);
2382                         goto tryagain;
2383                 }
2384                 for (i = 0; i < (ret * 2); i++) {
2385                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2386                             ND_NFSV4) {
2387                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2388                             if (*(tl + 1))
2389                                 nd->nd_flag |= ND_NOMOREDATA;
2390                         }
2391                 }
2392                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2393         }
2394         if (nd->nd_repstat && !error)
2395                 error = nd->nd_repstat;
2396 nfsmout:
2397         mbuf_freem(nd->nd_mrep);
2398         return (error);
2399 }
2400
2401 /*
2402  * Do an nfs rename rpc.
2403  */
2404 APPLESTATIC int
2405 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2406     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2407     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2408     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2409 {
2410         u_int32_t *tl;
2411         struct nfsrv_descript nfsd, *nd = &nfsd;
2412         struct nfsmount *nmp;
2413         struct nfsnode *np;
2414         nfsattrbit_t attrbits;
2415         nfsv4stateid_t fdstateid, tdstateid;
2416         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2417         
2418         *fattrflagp = 0;
2419         *tattrflagp = 0;
2420         nmp = VFSTONFS(vnode_mount(fdvp));
2421         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2422                 return (ENAMETOOLONG);
2423 tryagain:
2424         if (NFSHASNFSV4(nmp) && ret == 0) {
2425                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2426                     &tdstateid, &gottd, p);
2427                 if (gotfd && gottd) {
2428                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2429                 } else if (gotfd) {
2430                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2431                 } else if (gottd) {
2432                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2433                 }
2434                 if (gotfd) {
2435                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2436                         if (NFSHASNFSV4N(nmp))
2437                                 *tl++ = 0;
2438                         else
2439                                 *tl++ = fdstateid.seqid;
2440                         *tl++ = fdstateid.other[0];
2441                         *tl++ = fdstateid.other[1];
2442                         *tl = fdstateid.other[2];
2443                         if (gottd) {
2444                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2445                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2446                                 np = VTONFS(tvp);
2447                                 (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2448                                     np->n_fhp->nfh_len, 0);
2449                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2450                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2451                         }
2452                 }
2453                 if (gottd) {
2454                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2455                         if (NFSHASNFSV4N(nmp))
2456                                 *tl++ = 0;
2457                         else
2458                                 *tl++ = tdstateid.seqid;
2459                         *tl++ = tdstateid.other[0];
2460                         *tl++ = tdstateid.other[1];
2461                         *tl = tdstateid.other[2];
2462                 }
2463                 if (ret > 0) {
2464                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2465                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2466                         np = VTONFS(fdvp);
2467                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2468                             np->n_fhp->nfh_len, 0);
2469                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2470                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2471                 }
2472         } else {
2473                 ret = 0;
2474         }
2475         if (ret == 0)
2476                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2477         if (nd->nd_flag & ND_NFSV4) {
2478                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2479                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2480                 NFSWCCATTR_ATTRBIT(&attrbits);
2481                 (void) nfsrv_putattrbit(nd, &attrbits);
2482                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2483                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2484                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2485                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2486                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2487                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2488                 (void) nfsrv_putattrbit(nd, &attrbits);
2489                 nd->nd_flag |= ND_V4WCCATTR;
2490                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2491                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2492         }
2493         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2494         if (!(nd->nd_flag & ND_NFSV4))
2495                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2496                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2497         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2498         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2499         if (error)
2500                 return (error);
2501         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2502                 /* For NFSv4, parse out any Delereturn replies. */
2503                 if (ret > 0 && nd->nd_repstat != 0 &&
2504                     (nd->nd_flag & ND_NOMOREDATA)) {
2505                         /*
2506                          * If the Delegreturn failed, try again without
2507                          * it. The server will Recall, as required.
2508                          */
2509                         mbuf_freem(nd->nd_mrep);
2510                         goto tryagain;
2511                 }
2512                 for (i = 0; i < (ret * 2); i++) {
2513                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2514                             ND_NFSV4) {
2515                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2516                             if (*(tl + 1)) {
2517                                 if (i == 0 && ret > 1) {
2518                                     /*
2519                                      * If the Delegreturn failed, try again
2520                                      * without it. The server will Recall, as
2521                                      * required.
2522                                      * If ret > 1, the first iteration of this
2523                                      * loop is the second DelegReturn result.
2524                                      */
2525                                     mbuf_freem(nd->nd_mrep);
2526                                     goto tryagain;
2527                                 } else {
2528                                     nd->nd_flag |= ND_NOMOREDATA;
2529                                 }
2530                             }
2531                         }
2532                 }
2533                 /* Now, the first wcc attribute reply. */
2534                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2535                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2536                         if (*(tl + 1))
2537                                 nd->nd_flag |= ND_NOMOREDATA;
2538                 }
2539                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2540                     fstuff);
2541                 /* and the second wcc attribute reply. */
2542                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2543                     !error) {
2544                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2545                         if (*(tl + 1))
2546                                 nd->nd_flag |= ND_NOMOREDATA;
2547                 }
2548                 if (!error)
2549                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2550                             NULL, tstuff);
2551         }
2552         if (nd->nd_repstat && !error)
2553                 error = nd->nd_repstat;
2554 nfsmout:
2555         mbuf_freem(nd->nd_mrep);
2556         return (error);
2557 }
2558
2559 /*
2560  * nfs hard link create rpc
2561  */
2562 APPLESTATIC int
2563 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2564     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2565     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2566 {
2567         u_int32_t *tl;
2568         struct nfsrv_descript nfsd, *nd = &nfsd;
2569         nfsattrbit_t attrbits;
2570         int error = 0;
2571
2572         *attrflagp = 0;
2573         *dattrflagp = 0;
2574         if (namelen > NFS_MAXNAMLEN)
2575                 return (ENAMETOOLONG);
2576         NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2577         if (nd->nd_flag & ND_NFSV4) {
2578                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2579                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2580         }
2581         (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2582                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2583         if (nd->nd_flag & ND_NFSV4) {
2584                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2585                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2586                 NFSWCCATTR_ATTRBIT(&attrbits);
2587                 (void) nfsrv_putattrbit(nd, &attrbits);
2588                 nd->nd_flag |= ND_V4WCCATTR;
2589                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2590                 *tl = txdr_unsigned(NFSV4OP_LINK);
2591         }
2592         (void) nfsm_strtom(nd, name, namelen);
2593         error = nfscl_request(nd, vp, p, cred, dstuff);
2594         if (error)
2595                 return (error);
2596         if (nd->nd_flag & ND_NFSV3) {
2597                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2598                 if (!error)
2599                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2600                             NULL, dstuff);
2601         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2602                 /*
2603                  * First, parse out the PutFH and Getattr result.
2604                  */
2605                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2606                 if (!(*(tl + 1)))
2607                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2608                 if (*(tl + 1))
2609                         nd->nd_flag |= ND_NOMOREDATA;
2610                 /*
2611                  * Get the pre-op attributes.
2612                  */
2613                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2614         }
2615         if (nd->nd_repstat && !error)
2616                 error = nd->nd_repstat;
2617 nfsmout:
2618         mbuf_freem(nd->nd_mrep);
2619         return (error);
2620 }
2621
2622 /*
2623  * nfs symbolic link create rpc
2624  */
2625 APPLESTATIC int
2626 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2627     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2628     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2629     int *dattrflagp, void *dstuff)
2630 {
2631         u_int32_t *tl;
2632         struct nfsrv_descript nfsd, *nd = &nfsd;
2633         struct nfsmount *nmp;
2634         int slen, error = 0;
2635
2636         *nfhpp = NULL;
2637         *attrflagp = 0;
2638         *dattrflagp = 0;
2639         nmp = VFSTONFS(vnode_mount(dvp));
2640         slen = strlen(target);
2641         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2642                 return (ENAMETOOLONG);
2643         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2644         if (nd->nd_flag & ND_NFSV4) {
2645                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2646                 *tl = txdr_unsigned(NFLNK);
2647                 (void) nfsm_strtom(nd, target, slen);
2648         }
2649         (void) nfsm_strtom(nd, name, namelen);
2650         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2651                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2652         if (!(nd->nd_flag & ND_NFSV4))
2653                 (void) nfsm_strtom(nd, target, slen);
2654         if (nd->nd_flag & ND_NFSV2)
2655                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2656         error = nfscl_request(nd, dvp, p, cred, dstuff);
2657         if (error)
2658                 return (error);
2659         if (nd->nd_flag & ND_NFSV4)
2660                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2661         if ((nd->nd_flag & ND_NFSV3) && !error) {
2662                 if (!nd->nd_repstat)
2663                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2664                 if (!error)
2665                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2666                             NULL, dstuff);
2667         }
2668         if (nd->nd_repstat && !error)
2669                 error = nd->nd_repstat;
2670         mbuf_freem(nd->nd_mrep);
2671         /*
2672          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2673          * Only do this if vfs.nfs.ignore_eexist is set.
2674          * Never do this for NFSv4.1 or later minor versions, since sessions
2675          * should guarantee "exactly once" RPC semantics.
2676          */
2677         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2678             nmp->nm_minorvers == 0))
2679                 error = 0;
2680         return (error);
2681 }
2682
2683 /*
2684  * nfs make dir rpc
2685  */
2686 APPLESTATIC int
2687 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2688     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2689     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2690     int *dattrflagp, void *dstuff)
2691 {
2692         u_int32_t *tl;
2693         struct nfsrv_descript nfsd, *nd = &nfsd;
2694         nfsattrbit_t attrbits;
2695         int error = 0;
2696         struct nfsfh *fhp;
2697         struct nfsmount *nmp;
2698
2699         *nfhpp = NULL;
2700         *attrflagp = 0;
2701         *dattrflagp = 0;
2702         nmp = VFSTONFS(vnode_mount(dvp));
2703         fhp = VTONFS(dvp)->n_fhp;
2704         if (namelen > NFS_MAXNAMLEN)
2705                 return (ENAMETOOLONG);
2706         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2707         if (nd->nd_flag & ND_NFSV4) {
2708                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2709                 *tl = txdr_unsigned(NFDIR);
2710         }
2711         (void) nfsm_strtom(nd, name, namelen);
2712         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2713         if (nd->nd_flag & ND_NFSV4) {
2714                 NFSGETATTR_ATTRBIT(&attrbits);
2715                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2716                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2717                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2718                 (void) nfsrv_putattrbit(nd, &attrbits);
2719                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2720                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2721                 (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2722                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2723                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2724                 (void) nfsrv_putattrbit(nd, &attrbits);
2725         }
2726         error = nfscl_request(nd, dvp, p, cred, dstuff);
2727         if (error)
2728                 return (error);
2729         if (nd->nd_flag & ND_NFSV4)
2730                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2731         if (!nd->nd_repstat && !error) {
2732                 if (nd->nd_flag & ND_NFSV4) {
2733                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2734                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2735                 }
2736                 if (!error)
2737                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2738                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2739                         /* Get rid of the PutFH and Getattr status values. */
2740                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2741                         /* Load the directory attributes. */
2742                         error = nfsm_loadattr(nd, dnap);
2743                         if (error == 0)
2744                                 *dattrflagp = 1;
2745                 }
2746         }
2747         if ((nd->nd_flag & ND_NFSV3) && !error)
2748                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2749         if (nd->nd_repstat && !error)
2750                 error = nd->nd_repstat;
2751 nfsmout:
2752         mbuf_freem(nd->nd_mrep);
2753         /*
2754          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2755          * Only do this if vfs.nfs.ignore_eexist is set.
2756          * Never do this for NFSv4.1 or later minor versions, since sessions
2757          * should guarantee "exactly once" RPC semantics.
2758          */
2759         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2760             nmp->nm_minorvers == 0))
2761                 error = 0;
2762         return (error);
2763 }
2764
2765 /*
2766  * nfs remove directory call
2767  */
2768 APPLESTATIC int
2769 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2770     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2771 {
2772         struct nfsrv_descript nfsd, *nd = &nfsd;
2773         int error = 0;
2774
2775         *dattrflagp = 0;
2776         if (namelen > NFS_MAXNAMLEN)
2777                 return (ENAMETOOLONG);
2778         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2779         (void) nfsm_strtom(nd, name, namelen);
2780         error = nfscl_request(nd, dvp, p, cred, dstuff);
2781         if (error)
2782                 return (error);
2783         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2784                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2785         if (nd->nd_repstat && !error)
2786                 error = nd->nd_repstat;
2787         mbuf_freem(nd->nd_mrep);
2788         /*
2789          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2790          */
2791         if (error == ENOENT)
2792                 error = 0;
2793         return (error);
2794 }
2795
2796 /*
2797  * Readdir rpc.
2798  * Always returns with either uio_resid unchanged, if you are at the
2799  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2800  * filled in.
2801  * I felt this would allow caching of directory blocks more easily
2802  * than returning a pertially filled block.
2803  * Directory offset cookies:
2804  * Oh my, what to do with them...
2805  * I can think of three ways to deal with them:
2806  * 1 - have the layer above these RPCs maintain a map between logical
2807  *     directory byte offsets and the NFS directory offset cookies
2808  * 2 - pass the opaque directory offset cookies up into userland
2809  *     and let the libc functions deal with them, via the system call
2810  * 3 - return them to userland in the "struct dirent", so future versions
2811  *     of libc can use them and do whatever is necessary to make things work
2812  *     above these rpc calls, in the meantime
2813  * For now, I do #3 by "hiding" the directory offset cookies after the
2814  * d_name field in struct dirent. This is space inside d_reclen that
2815  * will be ignored by anything that doesn't know about them.
2816  * The directory offset cookies are filled in as the last 8 bytes of
2817  * each directory entry, after d_name. Someday, the userland libc
2818  * functions may be able to use these. In the meantime, it satisfies
2819  * OpenBSD's requirements for cookies being returned.
2820  * If expects the directory offset cookie for the read to be in uio_offset
2821  * and returns the one for the next entry after this directory block in
2822  * there, as well.
2823  */
2824 APPLESTATIC int
2825 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2826     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2827     int *eofp, void *stuff)
2828 {
2829         int len, left;
2830         struct dirent *dp = NULL;
2831         u_int32_t *tl;
2832         nfsquad_t cookie, ncookie;
2833         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2834         struct nfsnode *dnp = VTONFS(vp);
2835         struct nfsvattr nfsva;
2836         struct nfsrv_descript nfsd, *nd = &nfsd;
2837         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2838         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2839         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2840         char *cp;
2841         nfsattrbit_t attrbits, dattrbits;
2842         u_int32_t rderr, *tl2 = NULL;
2843         size_t tresid;
2844
2845         KASSERT(uiop->uio_iovcnt == 1 &&
2846             (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2847             ("nfs readdirrpc bad uio"));
2848         ncookie.lval[0] = ncookie.lval[1] = 0;
2849         /*
2850          * There is no point in reading a lot more than uio_resid, however
2851          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2852          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2853          * will never make readsize > nm_readdirsize.
2854          */
2855         readsize = nmp->nm_readdirsize;
2856         if (readsize > uio_uio_resid(uiop))
2857                 readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2858
2859         *attrflagp = 0;
2860         if (eofp)
2861                 *eofp = 0;
2862         tresid = uio_uio_resid(uiop);
2863         cookie.lval[0] = cookiep->nfsuquad[0];
2864         cookie.lval[1] = cookiep->nfsuquad[1];
2865         nd->nd_mrep = NULL;
2866
2867         /*
2868          * For NFSv4, first create the "." and ".." entries.
2869          */
2870         if (NFSHASNFSV4(nmp)) {
2871                 reqsize = 6 * NFSX_UNSIGNED;
2872                 NFSGETATTR_ATTRBIT(&dattrbits);
2873                 NFSZERO_ATTRBIT(&attrbits);
2874                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2875                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2876                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2877                     NFSATTRBIT_MOUNTEDONFILEID)) {
2878                         NFSSETBIT_ATTRBIT(&attrbits,
2879                             NFSATTRBIT_MOUNTEDONFILEID);
2880                         gotmnton = 1;
2881                 } else {
2882                         /*
2883                          * Must fake it. Use the fileno, except when the
2884                          * fsid is != to that of the directory. For that
2885                          * case, generate a fake fileno that is not the same.
2886                          */
2887                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2888                         gotmnton = 0;
2889                 }
2890
2891                 /*
2892                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2893                  */
2894                 if (uiop->uio_offset == 0) {
2895                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2896                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2897                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2898                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2899                         (void) nfsrv_putattrbit(nd, &attrbits);
2900                         error = nfscl_request(nd, vp, p, cred, stuff);
2901                         if (error)
2902                             return (error);
2903                         dotfileid = 0;  /* Fake out the compiler. */
2904                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2905                             error = nfsm_loadattr(nd, &nfsva);
2906                             if (error != 0)
2907                                 goto nfsmout;
2908                             dotfileid = nfsva.na_fileid;
2909                         }
2910                         if (nd->nd_repstat == 0) {
2911                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2912                             len = fxdr_unsigned(int, *(tl + 4));
2913                             if (len > 0 && len <= NFSX_V4FHMAX)
2914                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2915                             else
2916                                 error = EPERM;
2917                             if (!error) {
2918                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2919                                 nfsva.na_mntonfileno = UINT64_MAX;
2920                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2921                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2922                                     NULL, NULL, NULL, p, cred);
2923                                 if (error) {
2924                                     dotdotfileid = dotfileid;
2925                                 } else if (gotmnton) {
2926                                     if (nfsva.na_mntonfileno != UINT64_MAX)
2927                                         dotdotfileid = nfsva.na_mntonfileno;
2928                                     else
2929                                         dotdotfileid = nfsva.na_fileid;
2930                                 } else if (nfsva.na_filesid[0] ==
2931                                     dnp->n_vattr.na_filesid[0] &&
2932                                     nfsva.na_filesid[1] ==
2933                                     dnp->n_vattr.na_filesid[1]) {
2934                                     dotdotfileid = nfsva.na_fileid;
2935                                 } else {
2936                                     do {
2937                                         fakefileno--;
2938                                     } while (fakefileno ==
2939                                         nfsva.na_fileid);
2940                                     dotdotfileid = fakefileno;
2941                                 }
2942                             }
2943                         } else if (nd->nd_repstat == NFSERR_NOENT) {
2944                             /*
2945                              * Lookupp returns NFSERR_NOENT when we are
2946                              * at the root, so just use the current dir.
2947                              */
2948                             nd->nd_repstat = 0;
2949                             dotdotfileid = dotfileid;
2950                         } else {
2951                             error = nd->nd_repstat;
2952                         }
2953                         mbuf_freem(nd->nd_mrep);
2954                         if (error)
2955                             return (error);
2956                         nd->nd_mrep = NULL;
2957                         dp = (struct dirent *)uio_iov_base(uiop);
2958                         dp->d_off = 0;
2959                         dp->d_type = DT_DIR;
2960                         dp->d_fileno = dotfileid;
2961                         dp->d_namlen = 1;
2962                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
2963                         dp->d_name[0] = '.';
2964                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2965                         /*
2966                          * Just make these offset cookie 0.
2967                          */
2968                         tl = (u_int32_t *)&dp->d_name[8];
2969                         *tl++ = 0;
2970                         *tl = 0;
2971                         blksiz += dp->d_reclen;
2972                         uio_uio_resid_add(uiop, -(dp->d_reclen));
2973                         uiop->uio_offset += dp->d_reclen;
2974                         uio_iov_base_add(uiop, dp->d_reclen);
2975                         uio_iov_len_add(uiop, -(dp->d_reclen));
2976                         dp = (struct dirent *)uio_iov_base(uiop);
2977                         dp->d_off = 0;
2978                         dp->d_type = DT_DIR;
2979                         dp->d_fileno = dotdotfileid;
2980                         dp->d_namlen = 2;
2981                         *((uint64_t *)dp->d_name) = 0;
2982                         dp->d_name[0] = '.';
2983                         dp->d_name[1] = '.';
2984                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2985                         /*
2986                          * Just make these offset cookie 0.
2987                          */
2988                         tl = (u_int32_t *)&dp->d_name[8];
2989                         *tl++ = 0;
2990                         *tl = 0;
2991                         blksiz += dp->d_reclen;
2992                         uio_uio_resid_add(uiop, -(dp->d_reclen));
2993                         uiop->uio_offset += dp->d_reclen;
2994                         uio_iov_base_add(uiop, dp->d_reclen);
2995                         uio_iov_len_add(uiop, -(dp->d_reclen));
2996                 }
2997                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2998         } else {
2999                 reqsize = 5 * NFSX_UNSIGNED;
3000         }
3001
3002
3003         /*
3004          * Loop around doing readdir rpc's of size readsize.
3005          * The stopping criteria is EOF or buffer full.
3006          */
3007         while (more_dirs && bigenough) {
3008                 *attrflagp = 0;
3009                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3010                 if (nd->nd_flag & ND_NFSV2) {
3011                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3012                         *tl++ = cookie.lval[1];
3013                         *tl = txdr_unsigned(readsize);
3014                 } else {
3015                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3016                         *tl++ = cookie.lval[0];
3017                         *tl++ = cookie.lval[1];
3018                         if (cookie.qval == 0) {
3019                                 *tl++ = 0;
3020                                 *tl++ = 0;
3021                         } else {
3022                                 NFSLOCKNODE(dnp);
3023                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3024                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3025                                 NFSUNLOCKNODE(dnp);
3026                         }
3027                         if (nd->nd_flag & ND_NFSV4) {
3028                                 *tl++ = txdr_unsigned(readsize);
3029                                 *tl = txdr_unsigned(readsize);
3030                                 (void) nfsrv_putattrbit(nd, &attrbits);
3031                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3032                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3033                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3034                         } else {
3035                                 *tl = txdr_unsigned(readsize);
3036                         }
3037                 }
3038                 error = nfscl_request(nd, vp, p, cred, stuff);
3039                 if (error)
3040                         return (error);
3041                 if (!(nd->nd_flag & ND_NFSV2)) {
3042                         if (nd->nd_flag & ND_NFSV3)
3043                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3044                                     stuff);
3045                         if (!nd->nd_repstat && !error) {
3046                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3047                                 NFSLOCKNODE(dnp);
3048                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3049                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3050                                 NFSUNLOCKNODE(dnp);
3051                         }
3052                 }
3053                 if (nd->nd_repstat || error) {
3054                         if (!error)
3055                                 error = nd->nd_repstat;
3056                         goto nfsmout;
3057                 }
3058                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3059                 more_dirs = fxdr_unsigned(int, *tl);
3060                 if (!more_dirs)
3061                         tryformoredirs = 0;
3062         
3063                 /* loop through the dir entries, doctoring them to 4bsd form */
3064                 while (more_dirs && bigenough) {
3065                         if (nd->nd_flag & ND_NFSV4) {
3066                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3067                                 ncookie.lval[0] = *tl++;
3068                                 ncookie.lval[1] = *tl++;
3069                                 len = fxdr_unsigned(int, *tl);
3070                         } else if (nd->nd_flag & ND_NFSV3) {
3071                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3072                                 nfsva.na_fileid = fxdr_hyper(tl);
3073                                 tl += 2;
3074                                 len = fxdr_unsigned(int, *tl);
3075                         } else {
3076                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3077                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3078                                     *tl++);
3079                                 len = fxdr_unsigned(int, *tl);
3080                         }
3081                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3082                                 error = EBADRPC;
3083                                 goto nfsmout;
3084                         }
3085                         tlen = roundup2(len, 8);
3086                         if (tlen == len)
3087                                 tlen += 8;  /* To ensure null termination. */
3088                         left = DIRBLKSIZ - blksiz;
3089                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3090                                 dp->d_reclen += left;
3091                                 uio_iov_base_add(uiop, left);
3092                                 uio_iov_len_add(uiop, -(left));
3093                                 uio_uio_resid_add(uiop, -(left));
3094                                 uiop->uio_offset += left;
3095                                 blksiz = 0;
3096                         }
3097                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3098                             uio_uio_resid(uiop))
3099                                 bigenough = 0;
3100                         if (bigenough) {
3101                                 dp = (struct dirent *)uio_iov_base(uiop);
3102                                 dp->d_off = 0;
3103                                 dp->d_namlen = len;
3104                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3105                                     NFSX_HYPER;
3106                                 dp->d_type = DT_UNKNOWN;
3107                                 blksiz += dp->d_reclen;
3108                                 if (blksiz == DIRBLKSIZ)
3109                                         blksiz = 0;
3110                                 uio_uio_resid_add(uiop, -(DIRHDSIZ));
3111                                 uiop->uio_offset += DIRHDSIZ;
3112                                 uio_iov_base_add(uiop, DIRHDSIZ);
3113                                 uio_iov_len_add(uiop, -(DIRHDSIZ));
3114                                 error = nfsm_mbufuio(nd, uiop, len);
3115                                 if (error)
3116                                         goto nfsmout;
3117                                 cp = uio_iov_base(uiop);
3118                                 tlen -= len;
3119                                 *cp = '\0';     /* null terminate */
3120                                 cp += tlen;     /* points to cookie storage */
3121                                 tl2 = (u_int32_t *)cp;
3122                                 uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3123                                 uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3124                                 uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3125                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3126                         } else {
3127                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3128                                 if (error)
3129                                         goto nfsmout;
3130                         }
3131                         if (nd->nd_flag & ND_NFSV4) {
3132                                 rderr = 0;
3133                                 nfsva.na_mntonfileno = UINT64_MAX;
3134                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3135                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3136                                     NULL, NULL, &rderr, p, cred);
3137                                 if (error)
3138                                         goto nfsmout;
3139                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3140                         } else if (nd->nd_flag & ND_NFSV3) {
3141                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3142                                 ncookie.lval[0] = *tl++;
3143                                 ncookie.lval[1] = *tl++;
3144                         } else {
3145                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3146                                 ncookie.lval[0] = 0;
3147                                 ncookie.lval[1] = *tl++;
3148                         }
3149                         if (bigenough) {
3150                             if (nd->nd_flag & ND_NFSV4) {
3151                                 if (rderr) {
3152                                     dp->d_fileno = 0;
3153                                 } else {
3154                                     if (gotmnton) {
3155                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3156                                             dp->d_fileno = nfsva.na_mntonfileno;
3157                                         else
3158                                             dp->d_fileno = nfsva.na_fileid;
3159                                     } else if (nfsva.na_filesid[0] ==
3160                                         dnp->n_vattr.na_filesid[0] &&
3161                                         nfsva.na_filesid[1] ==
3162                                         dnp->n_vattr.na_filesid[1]) {
3163                                         dp->d_fileno = nfsva.na_fileid;
3164                                     } else {
3165                                         do {
3166                                             fakefileno--;
3167                                         } while (fakefileno ==
3168                                             nfsva.na_fileid);
3169                                         dp->d_fileno = fakefileno;
3170                                     }
3171                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3172                                 }
3173                             } else {
3174                                 dp->d_fileno = nfsva.na_fileid;
3175                             }
3176                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3177                                 ncookie.lval[0];
3178                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3179                                 ncookie.lval[1];
3180                         }
3181                         more_dirs = fxdr_unsigned(int, *tl);
3182                 }
3183                 /*
3184                  * If at end of rpc data, get the eof boolean
3185                  */
3186                 if (!more_dirs) {
3187                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3188                         eof = fxdr_unsigned(int, *tl);
3189                         if (tryformoredirs)
3190                                 more_dirs = !eof;
3191                         if (nd->nd_flag & ND_NFSV4) {
3192                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3193                                     stuff);
3194                                 if (error)
3195                                         goto nfsmout;
3196                         }
3197                 }
3198                 mbuf_freem(nd->nd_mrep);
3199                 nd->nd_mrep = NULL;
3200         }
3201         /*
3202          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3203          * by increasing d_reclen for the last record.
3204          */
3205         if (blksiz > 0) {
3206                 left = DIRBLKSIZ - blksiz;
3207                 dp->d_reclen += left;
3208                 uio_iov_base_add(uiop, left);
3209                 uio_iov_len_add(uiop, -(left));
3210                 uio_uio_resid_add(uiop, -(left));
3211                 uiop->uio_offset += left;
3212         }
3213
3214         /*
3215          * If returning no data, assume end of file.
3216          * If not bigenough, return not end of file, since you aren't
3217          *    returning all the data
3218          * Otherwise, return the eof flag from the server.
3219          */
3220         if (eofp) {
3221                 if (tresid == ((size_t)(uio_uio_resid(uiop))))
3222                         *eofp = 1;
3223                 else if (!bigenough)
3224                         *eofp = 0;
3225                 else
3226                         *eofp = eof;
3227         }
3228
3229         /*
3230          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3231          */
3232         while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3233                 dp = (struct dirent *)uio_iov_base(uiop);
3234                 dp->d_type = DT_UNKNOWN;
3235                 dp->d_fileno = 0;
3236                 dp->d_namlen = 0;
3237                 dp->d_name[0] = '\0';
3238                 tl = (u_int32_t *)&dp->d_name[4];
3239                 *tl++ = cookie.lval[0];
3240                 *tl = cookie.lval[1];
3241                 dp->d_reclen = DIRBLKSIZ;
3242                 uio_iov_base_add(uiop, DIRBLKSIZ);
3243                 uio_iov_len_add(uiop, -(DIRBLKSIZ));
3244                 uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3245                 uiop->uio_offset += DIRBLKSIZ;
3246         }
3247
3248 nfsmout:
3249         if (nd->nd_mrep != NULL)
3250                 mbuf_freem(nd->nd_mrep);
3251         return (error);
3252 }
3253
3254 #ifndef APPLE
3255 /*
3256  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3257  * (Also used for NFS V4 when mount flag set.)
3258  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3259  */
3260 APPLESTATIC int
3261 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3262     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3263     int *eofp, void *stuff)
3264 {
3265         int len, left;
3266         struct dirent *dp = NULL;
3267         u_int32_t *tl;
3268         vnode_t newvp = NULLVP;
3269         struct nfsrv_descript nfsd, *nd = &nfsd;
3270         struct nameidata nami, *ndp = &nami;
3271         struct componentname *cnp = &ndp->ni_cnd;
3272         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3273         struct nfsnode *dnp = VTONFS(vp), *np;
3274         struct nfsvattr nfsva;
3275         struct nfsfh *nfhp;
3276         nfsquad_t cookie, ncookie;
3277         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3278         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3279         int isdotdot = 0, unlocknewvp = 0;
3280         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3281         u_int64_t fileno = 0;
3282         char *cp;
3283         nfsattrbit_t attrbits, dattrbits;
3284         size_t tresid;
3285         u_int32_t *tl2 = NULL, rderr;
3286         struct timespec dctime;
3287
3288         KASSERT(uiop->uio_iovcnt == 1 &&
3289             (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3290             ("nfs readdirplusrpc bad uio"));
3291         ncookie.lval[0] = ncookie.lval[1] = 0;
3292         timespecclear(&dctime);
3293         *attrflagp = 0;
3294         if (eofp != NULL)
3295                 *eofp = 0;
3296         ndp->ni_dvp = vp;
3297         nd->nd_mrep = NULL;
3298         cookie.lval[0] = cookiep->nfsuquad[0];
3299         cookie.lval[1] = cookiep->nfsuquad[1];
3300         tresid = uio_uio_resid(uiop);
3301
3302         /*
3303          * For NFSv4, first create the "." and ".." entries.
3304          */
3305         if (NFSHASNFSV4(nmp)) {
3306                 NFSGETATTR_ATTRBIT(&dattrbits);
3307                 NFSZERO_ATTRBIT(&attrbits);
3308                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3309                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3310                     NFSATTRBIT_MOUNTEDONFILEID)) {
3311                         NFSSETBIT_ATTRBIT(&attrbits,
3312                             NFSATTRBIT_MOUNTEDONFILEID);
3313                         gotmnton = 1;
3314                 } else {
3315                         /*
3316                          * Must fake it. Use the fileno, except when the
3317                          * fsid is != to that of the directory. For that
3318                          * case, generate a fake fileno that is not the same.
3319                          */
3320                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3321                         gotmnton = 0;
3322                 }
3323
3324                 /*
3325                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3326                  */
3327                 if (uiop->uio_offset == 0) {
3328                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3329                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3330                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3331                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3332                         (void) nfsrv_putattrbit(nd, &attrbits);
3333                         error = nfscl_request(nd, vp, p, cred, stuff);
3334                         if (error)
3335                             return (error);
3336                         dotfileid = 0;  /* Fake out the compiler. */
3337                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3338                             error = nfsm_loadattr(nd, &nfsva);
3339                             if (error != 0)
3340                                 goto nfsmout;
3341                             dctime = nfsva.na_ctime;
3342                             dotfileid = nfsva.na_fileid;
3343                         }
3344                         if (nd->nd_repstat == 0) {
3345                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3346                             len = fxdr_unsigned(int, *(tl + 4));
3347                             if (len > 0 && len <= NFSX_V4FHMAX)
3348                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3349                             else
3350                                 error = EPERM;
3351                             if (!error) {
3352                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3353                                 nfsva.na_mntonfileno = UINT64_MAX;
3354                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3355                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3356                                     NULL, NULL, NULL, p, cred);
3357                                 if (error) {
3358                                     dotdotfileid = dotfileid;
3359                                 } else if (gotmnton) {
3360                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3361                                         dotdotfileid = nfsva.na_mntonfileno;
3362                                     else
3363                                         dotdotfileid = nfsva.na_fileid;
3364                                 } else if (nfsva.na_filesid[0] ==
3365                                     dnp->n_vattr.na_filesid[0] &&
3366                                     nfsva.na_filesid[1] ==
3367                                     dnp->n_vattr.na_filesid[1]) {
3368                                     dotdotfileid = nfsva.na_fileid;
3369                                 } else {
3370                                     do {
3371                                         fakefileno--;
3372                                     } while (fakefileno ==
3373                                         nfsva.na_fileid);
3374                                     dotdotfileid = fakefileno;
3375                                 }
3376                             }
3377                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3378                             /*
3379                              * Lookupp returns NFSERR_NOENT when we are
3380                              * at the root, so just use the current dir.
3381                              */
3382                             nd->nd_repstat = 0;
3383                             dotdotfileid = dotfileid;
3384                         } else {
3385                             error = nd->nd_repstat;
3386                         }
3387                         mbuf_freem(nd->nd_mrep);
3388                         if (error)
3389                             return (error);
3390                         nd->nd_mrep = NULL;
3391                         dp = (struct dirent *)uio_iov_base(uiop);
3392                         dp->d_off = 0;
3393                         dp->d_type = DT_DIR;
3394                         dp->d_fileno = dotfileid;
3395                         dp->d_namlen = 1;
3396                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3397                         dp->d_name[0] = '.';
3398                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3399                         /*
3400                          * Just make these offset cookie 0.
3401                          */
3402                         tl = (u_int32_t *)&dp->d_name[8];
3403                         *tl++ = 0;
3404                         *tl = 0;
3405                         blksiz += dp->d_reclen;
3406                         uio_uio_resid_add(uiop, -(dp->d_reclen));
3407                         uiop->uio_offset += dp->d_reclen;
3408                         uio_iov_base_add(uiop, dp->d_reclen);
3409                         uio_iov_len_add(uiop, -(dp->d_reclen));
3410                         dp = (struct dirent *)uio_iov_base(uiop);
3411                         dp->d_off = 0;
3412                         dp->d_type = DT_DIR;
3413                         dp->d_fileno = dotdotfileid;
3414                         dp->d_namlen = 2;
3415                         *((uint64_t *)dp->d_name) = 0;
3416                         dp->d_name[0] = '.';
3417                         dp->d_name[1] = '.';
3418                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3419                         /*
3420                          * Just make these offset cookie 0.
3421                          */
3422                         tl = (u_int32_t *)&dp->d_name[8];
3423                         *tl++ = 0;
3424                         *tl = 0;
3425                         blksiz += dp->d_reclen;
3426                         uio_uio_resid_add(uiop, -(dp->d_reclen));
3427                         uiop->uio_offset += dp->d_reclen;
3428                         uio_iov_base_add(uiop, dp->d_reclen);
3429                         uio_iov_len_add(uiop, -(dp->d_reclen));
3430                 }
3431                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3432                 if (gotmnton)
3433                         NFSSETBIT_ATTRBIT(&attrbits,
3434                             NFSATTRBIT_MOUNTEDONFILEID);
3435         }
3436
3437         /*
3438          * Loop around doing readdir rpc's of size nm_readdirsize.
3439          * The stopping criteria is EOF or buffer full.
3440          */
3441         while (more_dirs && bigenough) {
3442                 *attrflagp = 0;
3443                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3444                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3445                 *tl++ = cookie.lval[0];
3446                 *tl++ = cookie.lval[1];
3447                 if (cookie.qval == 0) {
3448                         *tl++ = 0;
3449                         *tl++ = 0;
3450                 } else {
3451                         NFSLOCKNODE(dnp);
3452                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3453                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3454                         NFSUNLOCKNODE(dnp);
3455                 }
3456                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3457                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3458                 if (nd->nd_flag & ND_NFSV4) {
3459                         (void) nfsrv_putattrbit(nd, &attrbits);
3460                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3461                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3462                         (void) nfsrv_putattrbit(nd, &dattrbits);
3463                 }
3464                 error = nfscl_request(nd, vp, p, cred, stuff);
3465                 if (error)
3466                         return (error);
3467                 if (nd->nd_flag & ND_NFSV3)
3468                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3469                 if (nd->nd_repstat || error) {
3470                         if (!error)
3471                                 error = nd->nd_repstat;
3472                         goto nfsmout;
3473                 }
3474                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3475                         dctime = nap->na_ctime;
3476                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3477                 NFSLOCKNODE(dnp);
3478                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3479                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3480                 NFSUNLOCKNODE(dnp);
3481                 more_dirs = fxdr_unsigned(int, *tl);
3482                 if (!more_dirs)
3483                         tryformoredirs = 0;
3484         
3485                 /* loop through the dir entries, doctoring them to 4bsd form */
3486                 while (more_dirs && bigenough) {
3487                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3488                         if (nd->nd_flag & ND_NFSV4) {
3489                                 ncookie.lval[0] = *tl++;
3490                                 ncookie.lval[1] = *tl++;
3491                         } else {
3492                                 fileno = fxdr_hyper(tl);
3493                                 tl += 2;
3494                         }
3495                         len = fxdr_unsigned(int, *tl);
3496                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3497                                 error = EBADRPC;
3498                                 goto nfsmout;
3499                         }
3500                         tlen = roundup2(len, 8);
3501                         if (tlen == len)
3502                                 tlen += 8;  /* To ensure null termination. */
3503                         left = DIRBLKSIZ - blksiz;
3504                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3505                                 dp->d_reclen += left;
3506                                 uio_iov_base_add(uiop, left);
3507                                 uio_iov_len_add(uiop, -(left));
3508                                 uio_uio_resid_add(uiop, -(left));
3509                                 uiop->uio_offset += left;
3510                                 blksiz = 0;
3511                         }
3512                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3513                             uio_uio_resid(uiop))
3514                                 bigenough = 0;
3515                         if (bigenough) {
3516                                 dp = (struct dirent *)uio_iov_base(uiop);
3517                                 dp->d_off = 0;
3518                                 dp->d_namlen = len;
3519                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3520                                     NFSX_HYPER;
3521                                 dp->d_type = DT_UNKNOWN;
3522                                 blksiz += dp->d_reclen;
3523                                 if (blksiz == DIRBLKSIZ)
3524                                         blksiz = 0;
3525                                 uio_uio_resid_add(uiop, -(DIRHDSIZ));
3526                                 uiop->uio_offset += DIRHDSIZ;
3527                                 uio_iov_base_add(uiop, DIRHDSIZ);
3528                                 uio_iov_len_add(uiop, -(DIRHDSIZ));
3529                                 cnp->cn_nameptr = uio_iov_base(uiop);
3530                                 cnp->cn_namelen = len;
3531                                 NFSCNHASHZERO(cnp);
3532                                 error = nfsm_mbufuio(nd, uiop, len);
3533                                 if (error)
3534                                         goto nfsmout;
3535                                 cp = uio_iov_base(uiop);
3536                                 tlen -= len;
3537                                 *cp = '\0';
3538                                 cp += tlen;     /* points to cookie storage */
3539                                 tl2 = (u_int32_t *)cp;
3540                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3541                                     cnp->cn_nameptr[1] == '.')
3542                                         isdotdot = 1;
3543                                 else
3544                                         isdotdot = 0;
3545                                 uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3546                                 uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3547                                 uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3548                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3549                         } else {
3550                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3551                                 if (error)
3552                                         goto nfsmout;
3553                         }
3554                         nfhp = NULL;
3555                         if (nd->nd_flag & ND_NFSV3) {
3556                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3557                                 ncookie.lval[0] = *tl++;
3558                                 ncookie.lval[1] = *tl++;
3559                                 attrflag = fxdr_unsigned(int, *tl);
3560                                 if (attrflag) {
3561                                   error = nfsm_loadattr(nd, &nfsva);
3562                                   if (error)
3563                                         goto nfsmout;
3564                                 }
3565                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3566                                 if (*tl) {
3567                                         error = nfsm_getfh(nd, &nfhp);
3568                                         if (error)
3569                                             goto nfsmout;
3570                                 }
3571                                 if (!attrflag && nfhp != NULL) {
3572                                         free(nfhp, M_NFSFH);
3573                                         nfhp = NULL;
3574                                 }
3575                         } else {
3576                                 rderr = 0;
3577                                 nfsva.na_mntonfileno = 0xffffffff;
3578                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3579                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3580                                     NULL, NULL, &rderr, p, cred);
3581                                 if (error)
3582                                         goto nfsmout;
3583                         }
3584
3585                         if (bigenough) {
3586                             if (nd->nd_flag & ND_NFSV4) {
3587                                 if (rderr) {
3588                                     dp->d_fileno = 0;
3589                                 } else if (gotmnton) {
3590                                     if (nfsva.na_mntonfileno != 0xffffffff)
3591                                         dp->d_fileno = nfsva.na_mntonfileno;
3592                                     else
3593                                         dp->d_fileno = nfsva.na_fileid;
3594                                 } else if (nfsva.na_filesid[0] ==
3595                                     dnp->n_vattr.na_filesid[0] &&
3596                                     nfsva.na_filesid[1] ==
3597                                     dnp->n_vattr.na_filesid[1]) {
3598                                     dp->d_fileno = nfsva.na_fileid;
3599                                 } else {
3600                                     do {
3601                                         fakefileno--;
3602                                     } while (fakefileno ==
3603                                         nfsva.na_fileid);
3604                                     dp->d_fileno = fakefileno;
3605                                 }
3606                             } else {
3607                                 dp->d_fileno = fileno;
3608                             }
3609                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3610                                 ncookie.lval[0];
3611                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3612                                 ncookie.lval[1];
3613
3614                             if (nfhp != NULL) {
3615                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3616                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3617                                     VREF(vp);
3618                                     newvp = vp;
3619                                     unlocknewvp = 0;
3620                                     free(nfhp, M_NFSFH);
3621                                     np = dnp;
3622                                 } else if (isdotdot != 0) {
3623                                     /*
3624                                      * Skip doing a nfscl_nget() call for "..".
3625                                      * There's a race between acquiring the nfs
3626                                      * node here and lookups that look for the
3627                                      * directory being read (in the parent).
3628                                      * It would try to get a lock on ".." here,
3629                                      * owning the lock on the directory being
3630                                      * read. Lookup will hold the lock on ".."
3631                                      * and try to acquire the lock on the
3632                                      * directory being read.
3633                                      * If the directory is unlocked/relocked,
3634                                      * then there is a LOR with the buflock
3635                                      * vp is relocked.
3636                                      */
3637                                     free(nfhp, M_NFSFH);
3638                                 } else {
3639                                     error = nfscl_nget(vnode_mount(vp), vp,
3640                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3641                                     if (!error) {
3642                                         newvp = NFSTOV(np);
3643                                         unlocknewvp = 1;
3644                                     }
3645                                 }
3646                                 nfhp = NULL;
3647                                 if (newvp != NULLVP) {
3648                                     error = nfscl_loadattrcache(&newvp,
3649                                         &nfsva, NULL, NULL, 0, 0);
3650                                     if (error) {
3651                                         if (unlocknewvp)
3652                                             vput(newvp);
3653                                         else
3654                                             vrele(newvp);
3655                                         goto nfsmout;
3656                                     }
3657                                     dp->d_type =
3658                                         vtonfs_dtype(np->n_vattr.na_type);
3659                                     ndp->ni_vp = newvp;
3660                                     NFSCNHASH(cnp, HASHINIT);
3661                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3662                                         (newvp->v_type != VDIR ||
3663                                          dctime.tv_sec != 0)) {
3664                                         cache_enter_time(ndp->ni_dvp,
3665                                             ndp->ni_vp, cnp,
3666                                             &nfsva.na_ctime,
3667                                             newvp->v_type != VDIR ? NULL :
3668                                             &dctime);
3669                                     }
3670                                     if (unlocknewvp)
3671                                         vput(newvp);
3672                                     else
3673                                         vrele(newvp);
3674                                     newvp = NULLVP;
3675                                 }
3676                             }
3677                         } else if (nfhp != NULL) {
3678                             free(nfhp, M_NFSFH);
3679                         }
3680                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3681                         more_dirs = fxdr_unsigned(int, *tl);
3682                 }
3683                 /*
3684                  * If at end of rpc data, get the eof boolean
3685                  */
3686                 if (!more_dirs) {
3687                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3688                         eof = fxdr_unsigned(int, *tl);
3689                         if (tryformoredirs)
3690                                 more_dirs = !eof;
3691                         if (nd->nd_flag & ND_NFSV4) {
3692                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3693                                     stuff);
3694                                 if (error)
3695                                         goto nfsmout;
3696                         }
3697                 }
3698                 mbuf_freem(nd->nd_mrep);
3699                 nd->nd_mrep = NULL;
3700         }
3701         /*
3702          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3703          * by increasing d_reclen for the last record.
3704          */
3705         if (blksiz > 0) {
3706                 left = DIRBLKSIZ - blksiz;
3707                 dp->d_reclen += left;
3708                 uio_iov_base_add(uiop, left);
3709                 uio_iov_len_add(uiop, -(left));
3710                 uio_uio_resid_add(uiop, -(left));
3711                 uiop->uio_offset += left;
3712         }
3713
3714         /*
3715          * If returning no data, assume end of file.
3716          * If not bigenough, return not end of file, since you aren't
3717          *    returning all the data
3718          * Otherwise, return the eof flag from the server.
3719          */
3720         if (eofp != NULL) {
3721                 if (tresid == uio_uio_resid(uiop))
3722                         *eofp = 1;
3723                 else if (!bigenough)
3724                         *eofp = 0;
3725                 else
3726                         *eofp = eof;
3727         }
3728
3729         /*
3730          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3731          */
3732         while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3733                 dp = (struct dirent *)uio_iov_base(uiop);
3734                 dp->d_type = DT_UNKNOWN;
3735                 dp->d_fileno = 0;
3736                 dp->d_namlen = 0;
3737                 dp->d_name[0] = '\0';
3738                 tl = (u_int32_t *)&dp->d_name[4];
3739                 *tl++ = cookie.lval[0];
3740                 *tl = cookie.lval[1];
3741                 dp->d_reclen = DIRBLKSIZ;
3742                 uio_iov_base_add(uiop, DIRBLKSIZ);
3743                 uio_iov_len_add(uiop, -(DIRBLKSIZ));
3744                 uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3745                 uiop->uio_offset += DIRBLKSIZ;
3746         }
3747
3748 nfsmout:
3749         if (nd->nd_mrep != NULL)
3750                 mbuf_freem(nd->nd_mrep);
3751         return (error);
3752 }
3753 #endif  /* !APPLE */
3754
3755 /*
3756  * Nfs commit rpc
3757  */
3758 APPLESTATIC int
3759 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3760     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3761 {
3762         u_int32_t *tl;
3763         struct nfsrv_descript nfsd, *nd = &nfsd;
3764         nfsattrbit_t attrbits;
3765         int error;
3766         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3767         
3768         *attrflagp = 0;
3769         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3770         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3771         txdr_hyper(offset, tl);
3772         tl += 2;
3773         *tl = txdr_unsigned(cnt);
3774         if (nd->nd_flag & ND_NFSV4) {
3775                 /*
3776                  * And do a Getattr op.
3777                  */
3778                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3779                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3780                 NFSGETATTR_ATTRBIT(&attrbits);
3781                 (void) nfsrv_putattrbit(nd, &attrbits);
3782         }
3783         error = nfscl_request(nd, vp, p, cred, stuff);
3784         if (error)
3785                 return (error);
3786         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3787         if (!error && !nd->nd_repstat) {
3788                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3789                 NFSLOCKMNT(nmp);
3790                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3791                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3792                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
3793                 }
3794                 NFSUNLOCKMNT(nmp);
3795                 if (nd->nd_flag & ND_NFSV4)
3796                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3797         }
3798 nfsmout:
3799         if (!error && nd->nd_repstat)
3800                 error = nd->nd_repstat;
3801         mbuf_freem(nd->nd_mrep);
3802         return (error);
3803 }
3804
3805 /*
3806  * NFS byte range lock rpc.
3807  * (Mostly just calls one of the three lower level RPC routines.)
3808  */
3809 APPLESTATIC int
3810 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3811     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3812 {
3813         struct nfscllockowner *lp;
3814         struct nfsclclient *clp;
3815         struct nfsfh *nfhp;
3816         struct nfsrv_descript nfsd, *nd = &nfsd;
3817         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3818         u_int64_t off, len;
3819         off_t start, end;
3820         u_int32_t clidrev = 0;
3821         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3822         int callcnt, dorpc;
3823
3824         /*
3825          * Convert the flock structure into a start and end and do POSIX
3826          * bounds checking.
3827          */
3828         switch (fl->l_whence) {
3829         case SEEK_SET:
3830         case SEEK_CUR:
3831                 /*
3832                  * Caller is responsible for adding any necessary offset
3833                  * when SEEK_CUR is used.
3834                  */
3835                 start = fl->l_start;
3836                 off = fl->l_start;
3837                 break;
3838         case SEEK_END:
3839                 start = size + fl->l_start;
3840                 off = size + fl->l_start;
3841                 break;
3842         default:
3843                 return (EINVAL);
3844         }
3845         if (start < 0)
3846                 return (EINVAL);
3847         if (fl->l_len != 0) {
3848                 end = start + fl->l_len - 1;
3849                 if (end < start)
3850                         return (EINVAL);
3851         }
3852
3853         len = fl->l_len;
3854         if (len == 0)
3855                 len = NFS64BITSSET;
3856         retrycnt = 0;
3857         do {
3858             nd->nd_repstat = 0;
3859             if (op == F_GETLK) {
3860                 error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3861                 if (error)
3862                         return (error);
3863                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3864                 if (!error) {
3865                         clidrev = clp->nfsc_clientidrev;
3866                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3867                             p, id, flags);
3868                 } else if (error == -1) {
3869                         error = 0;
3870                 }
3871                 nfscl_clientrelease(clp);
3872             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3873                 /*
3874                  * We must loop around for all lockowner cases.
3875                  */
3876                 callcnt = 0;
3877                 error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3878                 if (error)
3879                         return (error);
3880                 do {
3881                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3882                         clp, id, flags, &lp, &dorpc);
3883                     /*
3884                      * If it returns a NULL lp, we're done.
3885                      */
3886                     if (lp == NULL) {
3887                         if (callcnt == 0)
3888                             nfscl_clientrelease(clp);
3889                         else
3890                             nfscl_releasealllocks(clp, vp, p, id, flags);
3891                         return (error);
3892                     }
3893                     if (nmp->nm_clp != NULL)
3894                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3895                     else
3896                         clidrev = 0;
3897                     /*
3898                      * If the server doesn't support Posix lock semantics,
3899                      * only allow locks on the entire file, since it won't
3900                      * handle overlapping byte ranges.
3901                      * There might still be a problem when a lock
3902                      * upgrade/downgrade (read<->write) occurs, since the
3903                      * server "might" expect an unlock first?
3904                      */
3905                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3906                         (off == 0 && len == NFS64BITSSET))) {
3907                         /*
3908                          * Since the lock records will go away, we must
3909                          * wait for grace and delay here.
3910                          */
3911                         do {
3912                             error = nfsrpc_locku(nd, nmp, lp, off, len,
3913                                 NFSV4LOCKT_READ, cred, p, 0);
3914                             if ((nd->nd_repstat == NFSERR_GRACE ||
3915                                  nd->nd_repstat == NFSERR_DELAY) &&
3916                                 error == 0)
3917                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3918                                     "nfs_advlock");
3919                         } while ((nd->nd_repstat == NFSERR_GRACE ||
3920                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
3921                     }
3922                     callcnt++;
3923                 } while (error == 0 && nd->nd_repstat == 0);
3924                 nfscl_releasealllocks(clp, vp, p, id, flags);
3925             } else if (op == F_SETLK) {
3926                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3927                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3928                 if (error || donelocally) {
3929                         return (error);
3930                 }
3931                 if (nmp->nm_clp != NULL)
3932                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3933                 else
3934                         clidrev = 0;
3935                 nfhp = VTONFS(vp)->n_fhp;
3936                 if (!lp->nfsl_open->nfso_posixlock &&
3937                     (off != 0 || len != NFS64BITSSET)) {
3938                         error = EINVAL;
3939                 } else {
3940                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3941                             nfhp->nfh_len, lp, newone, reclaim, off,
3942                             len, fl->l_type, cred, p, 0);
3943                 }
3944                 if (!error)
3945                         error = nd->nd_repstat;
3946                 nfscl_lockrelease(lp, error, newone);
3947             } else {
3948                 error = EINVAL;
3949             }
3950             if (!error)
3951                 error = nd->nd_repstat;
3952             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3953                 error == NFSERR_STALEDONTRECOVER ||
3954                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3955                 error == NFSERR_BADSESSION) {
3956                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
3957             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3958                 && clidrev != 0) {
3959                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3960                 retrycnt++;
3961             }
3962         } while (error == NFSERR_GRACE ||
3963             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3964             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3965             error == NFSERR_BADSESSION ||
3966             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3967              expireret == 0 && clidrev != 0 && retrycnt < 4));
3968         if (error && retrycnt >= 4)
3969                 error = EIO;
3970         return (error);
3971 }
3972
3973 /*
3974  * The lower level routine for the LockT case.
3975  */
3976 APPLESTATIC int
3977 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3978     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3979     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3980 {
3981         u_int32_t *tl;
3982         int error, type, size;
3983         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3984         struct nfsnode *np;
3985         struct nfsmount *nmp;
3986         struct nfsclsession *tsep;
3987
3988         nmp = VFSTONFS(vp->v_mount);
3989         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3990         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3991         if (fl->l_type == F_RDLCK)
3992                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3993         else
3994                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3995         txdr_hyper(off, tl);
3996         tl += 2;
3997         txdr_hyper(len, tl);
3998         tl += 2;
3999         tsep = nfsmnt_mdssession(nmp);
4000         *tl++ = tsep->nfsess_clientid.lval[0];
4001         *tl = tsep->nfsess_clientid.lval[1];
4002         nfscl_filllockowner(id, own, flags);
4003         np = VTONFS(vp);
4004         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4005             np->n_fhp->nfh_len);
4006         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4007         error = nfscl_request(nd, vp, p, cred, NULL);
4008         if (error)
4009                 return (error);
4010         if (nd->nd_repstat == 0) {
4011                 fl->l_type = F_UNLCK;
4012         } else if (nd->nd_repstat == NFSERR_DENIED) {
4013                 nd->nd_repstat = 0;
4014                 fl->l_whence = SEEK_SET;
4015                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4016                 fl->l_start = fxdr_hyper(tl);
4017                 tl += 2;
4018                 len = fxdr_hyper(tl);
4019                 tl += 2;
4020                 if (len == NFS64BITSSET)
4021                         fl->l_len = 0;
4022                 else
4023                         fl->l_len = len;
4024                 type = fxdr_unsigned(int, *tl++);
4025                 if (type == NFSV4LOCKT_WRITE)
4026                         fl->l_type = F_WRLCK;
4027                 else
4028                         fl->l_type = F_RDLCK;
4029                 /*
4030                  * XXX For now, I have no idea what to do with the
4031                  * conflicting lock_owner, so I'll just set the pid == 0
4032                  * and skip over the lock_owner.
4033                  */
4034                 fl->l_pid = (pid_t)0;
4035                 tl += 2;
4036                 size = fxdr_unsigned(int, *tl);
4037                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4038                         error = EBADRPC;
4039                 if (!error)
4040                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4041         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4042                 nfscl_initiate_recovery(clp);
4043 nfsmout:
4044         mbuf_freem(nd->nd_mrep);
4045         return (error);
4046 }
4047
4048 /*
4049  * Lower level function that performs the LockU RPC.
4050  */
4051 static int
4052 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4053     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4054     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4055 {
4056         u_int32_t *tl;
4057         int error;
4058
4059         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4060             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4061         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4062         *tl++ = txdr_unsigned(type);
4063         *tl = txdr_unsigned(lp->nfsl_seqid);
4064         if (nfstest_outofseq &&
4065             (arc4random() % nfstest_outofseq) == 0)
4066                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4067         tl++;
4068         if (NFSHASNFSV4N(nmp))
4069                 *tl++ = 0;
4070         else
4071                 *tl++ = lp->nfsl_stateid.seqid;
4072         *tl++ = lp->nfsl_stateid.other[0];
4073         *tl++ = lp->nfsl_stateid.other[1];
4074         *tl++ = lp->nfsl_stateid.other[2];
4075         txdr_hyper(off, tl);
4076         tl += 2;
4077         txdr_hyper(len, tl);
4078         if (syscred)
4079                 nd->nd_flag |= ND_USEGSSNAME;
4080         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4081             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4082         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4083         if (error)
4084                 return (error);
4085         if (nd->nd_repstat == 0) {
4086                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4087                 lp->nfsl_stateid.seqid = *tl++;
4088                 lp->nfsl_stateid.other[0] = *tl++;
4089                 lp->nfsl_stateid.other[1] = *tl++;
4090                 lp->nfsl_stateid.other[2] = *tl;
4091         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4092                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4093 nfsmout:
4094         mbuf_freem(nd->nd_mrep);
4095         return (error);
4096 }
4097
4098 /*
4099  * The actual Lock RPC.
4100  */
4101 APPLESTATIC int
4102 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4103     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4104     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4105     NFSPROC_T *p, int syscred)
4106 {
4107         u_int32_t *tl;
4108         int error, size;
4109         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4110         struct nfsclsession *tsep;
4111
4112         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4113         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4114         if (type == F_RDLCK)
4115                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4116         else
4117                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4118         *tl++ = txdr_unsigned(reclaim);
4119         txdr_hyper(off, tl);
4120         tl += 2;
4121         txdr_hyper(len, tl);
4122         tl += 2;
4123         if (newone) {
4124             *tl = newnfs_true;
4125             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4126                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4127             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4128             if (NFSHASNFSV4N(nmp))
4129                 *tl++ = 0;
4130             else
4131                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4132             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4133             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4134             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4135             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4136             tsep = nfsmnt_mdssession(nmp);
4137             *tl++ = tsep->nfsess_clientid.lval[0];
4138             *tl = tsep->nfsess_clientid.lval[1];
4139             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4140             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4141             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4142         } else {
4143             *tl = newnfs_false;
4144             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4145             if (NFSHASNFSV4N(nmp))
4146                 *tl++ = 0;
4147             else
4148                 *tl++ = lp->nfsl_stateid.seqid;
4149             *tl++ = lp->nfsl_stateid.other[0];
4150             *tl++ = lp->nfsl_stateid.other[1];
4151             *tl++ = lp->nfsl_stateid.other[2];
4152             *tl = txdr_unsigned(lp->nfsl_seqid);
4153             if (nfstest_outofseq &&
4154                 (arc4random() % nfstest_outofseq) == 0)
4155                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4156         }
4157         if (syscred)
4158                 nd->nd_flag |= ND_USEGSSNAME;
4159         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4160             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4161         if (error)
4162                 return (error);
4163         if (newone)
4164             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4165         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4166         if (nd->nd_repstat == 0) {
4167                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4168                 lp->nfsl_stateid.seqid = *tl++;
4169                 lp->nfsl_stateid.other[0] = *tl++;
4170                 lp->nfsl_stateid.other[1] = *tl++;
4171                 lp->nfsl_stateid.other[2] = *tl;
4172         } else if (nd->nd_repstat == NFSERR_DENIED) {
4173                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4174                 size = fxdr_unsigned(int, *(tl + 7));
4175                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4176                         error = EBADRPC;
4177                 if (!error)
4178                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4179         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4180                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4181 nfsmout:
4182         mbuf_freem(nd->nd_mrep);
4183         return (error);
4184 }
4185
4186 /*
4187  * nfs statfs rpc
4188  * (always called with the vp for the mount point)
4189  */
4190 APPLESTATIC int
4191 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4192     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4193     void *stuff)
4194 {
4195         u_int32_t *tl = NULL;
4196         struct nfsrv_descript nfsd, *nd = &nfsd;
4197         struct nfsmount *nmp;
4198         nfsattrbit_t attrbits;
4199         int error;
4200
4201         *attrflagp = 0;
4202         nmp = VFSTONFS(vnode_mount(vp));
4203         if (NFSHASNFSV4(nmp)) {
4204                 /*
4205                  * For V4, you actually do a getattr.
4206                  */
4207                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4208                 NFSSTATFS_GETATTRBIT(&attrbits);
4209                 (void) nfsrv_putattrbit(nd, &attrbits);
4210                 nd->nd_flag |= ND_USEGSSNAME;
4211                 error = nfscl_request(nd, vp, p, cred, stuff);
4212                 if (error)
4213                         return (error);
4214                 if (nd->nd_repstat == 0) {
4215                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4216                             NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4217                             cred);
4218                         if (!error) {
4219                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4220                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4221                                 NFSSETHASSETFSID(nmp);
4222                                 *attrflagp = 1;
4223                         }
4224                 } else {
4225                         error = nd->nd_repstat;
4226                 }
4227                 if (error)
4228                         goto nfsmout;
4229         } else {
4230                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4231                 error = nfscl_request(nd, vp, p, cred, stuff);
4232                 if (error)
4233                         return (error);
4234                 if (nd->nd_flag & ND_NFSV3) {
4235                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4236                         if (error)
4237                                 goto nfsmout;
4238                 }
4239                 if (nd->nd_repstat) {
4240                         error = nd->nd_repstat;
4241                         goto nfsmout;
4242                 }
4243                 NFSM_DISSECT(tl, u_int32_t *,
4244                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4245         }
4246         if (NFSHASNFSV3(nmp)) {
4247                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4248                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4249                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4250                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4251                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4252                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4253                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4254         } else if (NFSHASNFSV4(nmp) == 0) {
4255                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4256                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4257                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4258                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4259                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4260         }
4261 nfsmout:
4262         mbuf_freem(nd->nd_mrep);
4263         return (error);
4264 }
4265
4266 /*
4267  * nfs pathconf rpc
4268  */
4269 APPLESTATIC int
4270 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4271     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4272     void *stuff)
4273 {
4274         struct nfsrv_descript nfsd, *nd = &nfsd;
4275         struct nfsmount *nmp;
4276         u_int32_t *tl;
4277         nfsattrbit_t attrbits;
4278         int error;
4279
4280         *attrflagp = 0;
4281         nmp = VFSTONFS(vnode_mount(vp));
4282         if (NFSHASNFSV4(nmp)) {
4283                 /*
4284                  * For V4, you actually do a getattr.
4285                  */
4286                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4287                 NFSPATHCONF_GETATTRBIT(&attrbits);
4288                 (void) nfsrv_putattrbit(nd, &attrbits);
4289                 nd->nd_flag |= ND_USEGSSNAME;
4290                 error = nfscl_request(nd, vp, p, cred, stuff);
4291                 if (error)
4292                         return (error);
4293                 if (nd->nd_repstat == 0) {
4294                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4295                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4296                             cred);
4297                         if (!error)
4298                                 *attrflagp = 1;
4299                 } else {
4300                         error = nd->nd_repstat;
4301                 }
4302         } else {
4303                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4304                 error = nfscl_request(nd, vp, p, cred, stuff);
4305                 if (error)
4306                         return (error);
4307                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4308                 if (nd->nd_repstat && !error)
4309                         error = nd->nd_repstat;
4310                 if (!error) {
4311                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4312                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4313                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4314                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4315                         pc->pc_chownrestricted =
4316                             fxdr_unsigned(u_int32_t, *tl++);
4317                         pc->pc_caseinsensitive =
4318                             fxdr_unsigned(u_int32_t, *tl++);
4319                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4320                 }
4321         }
4322 nfsmout:
4323         mbuf_freem(nd->nd_mrep);
4324         return (error);
4325 }
4326
4327 /*
4328  * nfs version 3 fsinfo rpc call
4329  */
4330 APPLESTATIC int
4331 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4332     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4333 {
4334         u_int32_t *tl;
4335         struct nfsrv_descript nfsd, *nd = &nfsd;
4336         int error;
4337
4338         *attrflagp = 0;
4339         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4340         error = nfscl_request(nd, vp, p, cred, stuff);
4341         if (error)
4342                 return (error);
4343         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4344         if (nd->nd_repstat && !error)
4345                 error = nd->nd_repstat;
4346         if (!error) {
4347                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4348                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4349                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4350                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4351                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4352                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4353                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4354                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4355                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4356                 tl += 2;
4357                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4358                 tl += 2;
4359                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4360         }
4361 nfsmout:
4362         mbuf_freem(nd->nd_mrep);
4363         return (error);
4364 }
4365
4366 /*
4367  * This function performs the Renew RPC.
4368  */
4369 APPLESTATIC int
4370 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4371     NFSPROC_T *p)
4372 {
4373         u_int32_t *tl;
4374         struct nfsrv_descript nfsd;
4375         struct nfsrv_descript *nd = &nfsd;
4376         struct nfsmount *nmp;
4377         int error;
4378         struct nfssockreq *nrp;
4379         struct nfsclsession *tsep;
4380
4381         nmp = clp->nfsc_nmp;
4382         if (nmp == NULL)
4383                 return (0);
4384         if (dsp == NULL)
4385                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4386                     0);
4387         else
4388                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4389                     &dsp->nfsclds_sess, 0, 0);
4390         if (!NFSHASNFSV4N(nmp)) {
4391                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4392                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4393                 tsep = nfsmnt_mdssession(nmp);
4394                 *tl++ = tsep->nfsess_clientid.lval[0];
4395                 *tl = tsep->nfsess_clientid.lval[1];
4396         }
4397         nrp = NULL;
4398         if (dsp != NULL)
4399                 nrp = dsp->nfsclds_sockp;
4400         if (nrp == NULL)
4401                 /* If NULL, use the MDS socket. */
4402                 nrp = &nmp->nm_sockreq;
4403         nd->nd_flag |= ND_USEGSSNAME;
4404         if (dsp == NULL)
4405                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4406                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4407         else
4408                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4409                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4410         if (error)
4411                 return (error);
4412         error = nd->nd_repstat;
4413         mbuf_freem(nd->nd_mrep);
4414         return (error);
4415 }
4416
4417 /*
4418  * This function performs the Releaselockowner RPC.
4419  */
4420 APPLESTATIC int
4421 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4422     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4423 {
4424         struct nfsrv_descript nfsd, *nd = &nfsd;
4425         u_int32_t *tl;
4426         int error;
4427         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4428         struct nfsclsession *tsep;
4429
4430         if (NFSHASNFSV4N(nmp)) {
4431                 /* For NFSv4.1, do a FreeStateID. */
4432                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4433                     NULL, 0, 0);
4434                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4435         } else {
4436                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4437                     NULL, 0, 0);
4438                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4439                 tsep = nfsmnt_mdssession(nmp);
4440                 *tl++ = tsep->nfsess_clientid.lval[0];
4441                 *tl = tsep->nfsess_clientid.lval[1];
4442                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4443                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4444                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4445         }
4446         nd->nd_flag |= ND_USEGSSNAME;
4447         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4448             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4449         if (error)
4450                 return (error);
4451         error = nd->nd_repstat;
4452         mbuf_freem(nd->nd_mrep);
4453         return (error);
4454 }
4455
4456 /*
4457  * This function performs the Compound to get the mount pt FH.
4458  */
4459 APPLESTATIC int
4460 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4461     NFSPROC_T *p)
4462 {
4463         u_int32_t *tl;
4464         struct nfsrv_descript nfsd;
4465         struct nfsrv_descript *nd = &nfsd;
4466         u_char *cp, *cp2;
4467         int error, cnt, len, setnil;
4468         u_int32_t *opcntp;
4469
4470         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4471             0);
4472         cp = dirpath;
4473         cnt = 0;
4474         do {
4475                 setnil = 0;
4476                 while (*cp == '/')
4477                         cp++;
4478                 cp2 = cp;
4479                 while (*cp2 != '\0' && *cp2 != '/')
4480                         cp2++;
4481                 if (*cp2 == '/') {
4482                         setnil = 1;
4483                         *cp2 = '\0';
4484                 }
4485                 if (cp2 != cp) {
4486                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4487                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4488                         nfsm_strtom(nd, cp, strlen(cp));
4489                         cnt++;
4490                 }
4491                 if (setnil)
4492                         *cp2++ = '/';
4493                 cp = cp2;
4494         } while (*cp != '\0');
4495         if (NFSHASNFSV4N(nmp))
4496                 /* Has a Sequence Op done by nfscl_reqstart(). */
4497                 *opcntp = txdr_unsigned(3 + cnt);
4498         else
4499                 *opcntp = txdr_unsigned(2 + cnt);
4500         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4501         *tl = txdr_unsigned(NFSV4OP_GETFH);
4502         nd->nd_flag |= ND_USEGSSNAME;
4503         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4504                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4505         if (error)
4506                 return (error);
4507         if (nd->nd_repstat == 0) {
4508                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4509                 tl += (2 + 2 * cnt);
4510                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4511                         len > NFSX_FHMAX) {
4512                         nd->nd_repstat = NFSERR_BADXDR;
4513                 } else {
4514                         nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4515                         if (nd->nd_repstat == 0)
4516                                 nmp->nm_fhsize = len;
4517                 }
4518         }
4519         error = nd->nd_repstat;
4520 nfsmout:
4521         mbuf_freem(nd->nd_mrep);
4522         return (error);
4523 }
4524
4525 /*
4526  * This function performs the Delegreturn RPC.
4527  */
4528 APPLESTATIC int
4529 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4530     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4531 {
4532         u_int32_t *tl;
4533         struct nfsrv_descript nfsd;
4534         struct nfsrv_descript *nd = &nfsd;
4535         int error;
4536
4537         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4538             dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4539         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4540         if (NFSHASNFSV4N(nmp))
4541                 *tl++ = 0;
4542         else
4543                 *tl++ = dp->nfsdl_stateid.seqid;
4544         *tl++ = dp->nfsdl_stateid.other[0];
4545         *tl++ = dp->nfsdl_stateid.other[1];
4546         *tl = dp->nfsdl_stateid.other[2];
4547         if (syscred)
4548                 nd->nd_flag |= ND_USEGSSNAME;
4549         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4550             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4551         if (error)
4552                 return (error);
4553         error = nd->nd_repstat;
4554         mbuf_freem(nd->nd_mrep);
4555         return (error);
4556 }
4557
4558 /*
4559  * nfs getacl call.
4560  */
4561 APPLESTATIC int
4562 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4563     struct acl *aclp, void *stuff)
4564 {
4565         struct nfsrv_descript nfsd, *nd = &nfsd;
4566         int error;
4567         nfsattrbit_t attrbits;
4568         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4569         
4570         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4571                 return (EOPNOTSUPP);
4572         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4573         NFSZERO_ATTRBIT(&attrbits);
4574         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4575         (void) nfsrv_putattrbit(nd, &attrbits);
4576         error = nfscl_request(nd, vp, p, cred, stuff);
4577         if (error)
4578                 return (error);
4579         if (!nd->nd_repstat)
4580                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4581                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4582         else
4583                 error = nd->nd_repstat;
4584         mbuf_freem(nd->nd_mrep);
4585         return (error);
4586 }
4587
4588 /*
4589  * nfs setacl call.
4590  */
4591 APPLESTATIC int
4592 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4593     struct acl *aclp, void *stuff)
4594 {
4595         int error;
4596         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4597         
4598         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4599                 return (EOPNOTSUPP);
4600         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4601         return (error);
4602 }
4603
4604 /*
4605  * nfs setacl call.
4606  */
4607 static int
4608 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4609     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4610 {
4611         struct nfsrv_descript nfsd, *nd = &nfsd;
4612         int error;
4613         nfsattrbit_t attrbits;
4614         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4615         
4616         if (!NFSHASNFSV4(nmp))
4617                 return (EOPNOTSUPP);
4618         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4619         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4620         NFSZERO_ATTRBIT(&attrbits);
4621         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4622         (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4623             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
4624         error = nfscl_request(nd, vp, p, cred, stuff);
4625         if (error)
4626                 return (error);
4627         /* Don't care about the pre/postop attributes */
4628         mbuf_freem(nd->nd_mrep);
4629         return (nd->nd_repstat);
4630 }
4631
4632 /*
4633  * Do the NFSv4.1 Exchange ID.
4634  */
4635 int
4636 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4637     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4638     struct ucred *cred, NFSPROC_T *p)
4639 {
4640         uint32_t *tl, v41flags;
4641         struct nfsrv_descript nfsd;
4642         struct nfsrv_descript *nd = &nfsd;
4643         struct nfsclds *dsp;
4644         struct timespec verstime;
4645         int error, len;
4646
4647         *dspp = NULL;
4648         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0);
4649         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4650         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4651         *tl = txdr_unsigned(clp->nfsc_rev);
4652         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4653
4654         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4655         *tl++ = txdr_unsigned(exchflags);
4656         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4657
4658         /* Set the implementation id4 */
4659         *tl = txdr_unsigned(1);
4660         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4661         (void) nfsm_strtom(nd, version, strlen(version));
4662         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4663         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4664         verstime.tv_nsec = 0;
4665         txdr_nfsv4time(&verstime, tl);
4666         nd->nd_flag |= ND_USEGSSNAME;
4667         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4668             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4669         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4670             (int)nd->nd_repstat);
4671         if (error != 0)
4672                 return (error);
4673         if (nd->nd_repstat == 0) {
4674                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4675                 len = fxdr_unsigned(int, *(tl + 7));
4676                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4677                         error = NFSERR_BADXDR;
4678                         goto nfsmout;
4679                 }
4680                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4681                     M_WAITOK | M_ZERO);
4682                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4683                 dsp->nfsclds_servownlen = len;
4684                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4685                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4686                 dsp->nfsclds_sess.nfsess_sequenceid =
4687                     fxdr_unsigned(uint32_t, *tl++);
4688                 v41flags = fxdr_unsigned(uint32_t, *tl);
4689                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4690                     NFSHASPNFSOPT(nmp)) {
4691                         NFSCL_DEBUG(1, "set PNFS\n");
4692                         NFSLOCKMNT(nmp);
4693                         nmp->nm_state |= NFSSTA_PNFS;
4694                         NFSUNLOCKMNT(nmp);
4695                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4696                 }
4697                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4698                         dsp->nfsclds_flags |= NFSCLDS_DS;
4699                 if (len > 0)
4700                         nd->nd_repstat = nfsrv_mtostr(nd,
4701                             dsp->nfsclds_serverown, len);
4702                 if (nd->nd_repstat == 0) {
4703                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4704                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4705                             NULL, MTX_DEF);
4706                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4707                         *dspp = dsp;
4708                 } else
4709                         free(dsp, M_NFSCLDS);
4710         }
4711         error = nd->nd_repstat;
4712 nfsmout:
4713         mbuf_freem(nd->nd_mrep);
4714         return (error);
4715 }
4716
4717 /*
4718  * Do the NFSv4.1 Create Session.
4719  */
4720 int
4721 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4722     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4723     NFSPROC_T *p)
4724 {
4725         uint32_t crflags, maxval, *tl;
4726         struct nfsrv_descript nfsd;
4727         struct nfsrv_descript *nd = &nfsd;
4728         int error, irdcnt;
4729
4730         /* Make sure nm_rsize, nm_wsize is set. */
4731         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4732                 nmp->nm_rsize = NFS_MAXBSIZE;
4733         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4734                 nmp->nm_wsize = NFS_MAXBSIZE;
4735         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0,
4736             0);
4737         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4738         *tl++ = sep->nfsess_clientid.lval[0];
4739         *tl++ = sep->nfsess_clientid.lval[1];
4740         *tl++ = txdr_unsigned(sequenceid);
4741         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4742         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4743                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
4744         *tl = txdr_unsigned(crflags);
4745
4746         /* Fill in fore channel attributes. */
4747         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4748         *tl++ = 0;                              /* Header pad size */
4749         *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
4750         *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
4751         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4752         *tl++ = txdr_unsigned(20);              /* Max operations */
4753         *tl++ = txdr_unsigned(64);              /* Max slots */
4754         *tl = 0;                                /* No rdma ird */
4755
4756         /* Fill in back channel attributes. */
4757         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4758         *tl++ = 0;                              /* Header pad size */
4759         *tl++ = txdr_unsigned(10000);           /* Max request size */
4760         *tl++ = txdr_unsigned(10000);           /* Max response size */
4761         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4762         *tl++ = txdr_unsigned(4);               /* Max operations */
4763         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
4764         *tl = 0;                                /* No rdma ird */
4765
4766         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4767         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
4768
4769         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
4770         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
4771         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
4772         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4773         *tl++ = 0;                              /* Null machine name */
4774         *tl++ = 0;                              /* Uid == 0 */
4775         *tl++ = 0;                              /* Gid == 0 */
4776         *tl = 0;                                /* No additional gids */
4777         nd->nd_flag |= ND_USEGSSNAME;
4778         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4779             NFS_VER4, NULL, 1, NULL, NULL);
4780         if (error != 0)
4781                 return (error);
4782         if (nd->nd_repstat == 0) {
4783                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4784                     2 * NFSX_UNSIGNED);
4785                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4786                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4787                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4788                 crflags = fxdr_unsigned(uint32_t, *tl);
4789                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4790                         NFSLOCKMNT(nmp);
4791                         nmp->nm_state |= NFSSTA_SESSPERSIST;
4792                         NFSUNLOCKMNT(nmp);
4793                 }
4794
4795                 /* Get the fore channel slot count. */
4796                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4797                 tl++;                   /* Skip the header pad size. */
4798
4799                 /* Make sure nm_wsize is small enough. */
4800                 maxval = fxdr_unsigned(uint32_t, *tl++);
4801                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4802                         if (nmp->nm_wsize > 8096)
4803                                 nmp->nm_wsize /= 2;
4804                         else
4805                                 break;
4806                 }
4807
4808                 /* Make sure nm_rsize is small enough. */
4809                 maxval = fxdr_unsigned(uint32_t, *tl++);
4810                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4811                         if (nmp->nm_rsize > 8096)
4812                                 nmp->nm_rsize /= 2;
4813                         else
4814                                 break;
4815                 }
4816
4817                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4818                 tl++;
4819                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4820                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4821                 irdcnt = fxdr_unsigned(int, *tl);
4822                 if (irdcnt > 0)
4823                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4824
4825                 /* and the back channel slot count. */
4826                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4827                 tl += 5;
4828                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4829                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4830         }
4831         error = nd->nd_repstat;
4832 nfsmout:
4833         mbuf_freem(nd->nd_mrep);
4834         return (error);
4835 }
4836
4837 /*
4838  * Do the NFSv4.1 Destroy Session.
4839  */
4840 int
4841 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4842     struct ucred *cred, NFSPROC_T *p)
4843 {
4844         uint32_t *tl;
4845         struct nfsrv_descript nfsd;
4846         struct nfsrv_descript *nd = &nfsd;
4847         int error;
4848         struct nfsclsession *tsep;
4849
4850         nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4851             0);
4852         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4853         tsep = nfsmnt_mdssession(nmp);
4854         bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4855         nd->nd_flag |= ND_USEGSSNAME;
4856         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4857             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4858         if (error != 0)
4859                 return (error);
4860         error = nd->nd_repstat;
4861         mbuf_freem(nd->nd_mrep);
4862         return (error);
4863 }
4864
4865 /*
4866  * Do the NFSv4.1 Destroy Client.
4867  */
4868 int
4869 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4870     struct ucred *cred, NFSPROC_T *p)
4871 {
4872         uint32_t *tl;
4873         struct nfsrv_descript nfsd;
4874         struct nfsrv_descript *nd = &nfsd;
4875         int error;
4876         struct nfsclsession *tsep;
4877
4878         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
4879             0);
4880         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4881         tsep = nfsmnt_mdssession(nmp);
4882         *tl++ = tsep->nfsess_clientid.lval[0];
4883         *tl = tsep->nfsess_clientid.lval[1];
4884         nd->nd_flag |= ND_USEGSSNAME;
4885         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4886             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4887         if (error != 0)
4888                 return (error);
4889         error = nd->nd_repstat;
4890         mbuf_freem(nd->nd_mrep);
4891         return (error);
4892 }
4893
4894 /*
4895  * Do the NFSv4.1 LayoutGet.
4896  */
4897 static int
4898 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4899     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
4900     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
4901     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
4902     void *stuff)
4903 {
4904         struct nfsrv_descript nfsd, *nd = &nfsd;
4905         int error;
4906
4907         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
4908             0);
4909         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
4910             layouttype, layoutlen, 0);
4911         nd->nd_flag |= ND_USEGSSNAME;
4912         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4913             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4914         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
4915         if (error != 0)
4916                 return (error);
4917         if (nd->nd_repstat == 0)
4918                 error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
4919         if (error == 0 && nd->nd_repstat != 0)
4920                 error = nd->nd_repstat;
4921         mbuf_freem(nd->nd_mrep);
4922         return (error);
4923 }
4924
4925 /*
4926  * Do the NFSv4.1 Get Device Info.
4927  */
4928 int
4929 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4930     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4931     NFSPROC_T *p)
4932 {
4933         uint32_t cnt, *tl, vers, minorvers;
4934         struct nfsrv_descript nfsd;
4935         struct nfsrv_descript *nd = &nfsd;
4936         struct sockaddr_in sin, ssin;
4937         struct sockaddr_in6 sin6, ssin6;
4938         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
4939         struct nfscldevinfo *ndi;
4940         int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt;
4941         uint8_t stripeindex;
4942         sa_family_t af, safilled;
4943
4944         *ndip = NULL;
4945         ndi = NULL;
4946         gotdspp = NULL;
4947         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
4948             0);
4949         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4950         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4951         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4952         *tl++ = txdr_unsigned(layouttype);
4953         *tl++ = txdr_unsigned(100000);
4954         if (notifybitsp != NULL && *notifybitsp != 0) {
4955                 *tl = txdr_unsigned(1);         /* One word of bits. */
4956                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4957                 *tl = txdr_unsigned(*notifybitsp);
4958         } else
4959                 *tl = txdr_unsigned(0);
4960         nd->nd_flag |= ND_USEGSSNAME;
4961         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4962             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4963         if (error != 0)
4964                 return (error);
4965         if (nd->nd_repstat == 0) {
4966                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4967                 if (layouttype != fxdr_unsigned(int, *tl))
4968                         printf("EEK! devinfo layout type not same!\n");
4969                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
4970                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4971                         stripecnt = fxdr_unsigned(int, *tl);
4972                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4973                         if (stripecnt < 1 || stripecnt > 4096) {
4974                                 printf("pNFS File layout devinfo stripecnt %d:"
4975                                     " out of range\n", stripecnt);
4976                                 error = NFSERR_BADXDR;
4977                                 goto nfsmout;
4978                         }
4979                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
4980                             NFSX_UNSIGNED);
4981                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4982                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4983                         if (addrcnt < 1 || addrcnt > 128) {
4984                                 printf("NFS devinfo addrcnt %d: out of range\n",
4985                                     addrcnt);
4986                                 error = NFSERR_BADXDR;
4987                                 goto nfsmout;
4988                         }
4989         
4990                         /*
4991                          * Now we know how many stripe indices and addresses, so
4992                          * we can allocate the structure the correct size.
4993                          */
4994                         i = (stripecnt * sizeof(uint8_t)) /
4995                             sizeof(struct nfsclds *) + 1;
4996                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4997                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4998                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
4999                             M_ZERO);
5000                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5001                             NFSX_V4DEVICEID);
5002                         ndi->nfsdi_refcnt = 0;
5003                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5004                         ndi->nfsdi_stripecnt = stripecnt;
5005                         ndi->nfsdi_addrcnt = addrcnt;
5006                         /* Fill in the stripe indices. */
5007                         for (i = 0; i < stripecnt; i++) {
5008                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5009                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5010                                 if (stripeindex >= addrcnt) {
5011                                         printf("pNFS File Layout devinfo"
5012                                             " stripeindex %d: too big\n",
5013                                             (int)stripeindex);
5014                                         error = NFSERR_BADXDR;
5015                                         goto nfsmout;
5016                                 }
5017                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5018                         }
5019                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5020                         /* For Flex File, we only get one address list. */
5021                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5022                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5023                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5024                             NFSX_V4DEVICEID);
5025                         ndi->nfsdi_refcnt = 0;
5026                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5027                         addrcnt = ndi->nfsdi_addrcnt = 1;
5028                 }
5029
5030                 /* Now, dissect the server address(es). */
5031                 safilled = AF_UNSPEC;
5032                 for (i = 0; i < addrcnt; i++) {
5033                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5034                         cnt = fxdr_unsigned(uint32_t, *tl);
5035                         if (cnt == 0) {
5036                                 printf("NFS devinfo 0 len addrlist\n");
5037                                 error = NFSERR_BADXDR;
5038                                 goto nfsmout;
5039                         }
5040                         dspp = nfsfldi_addr(ndi, i);
5041                         safilled = AF_UNSPEC;
5042                         for (j = 0; j < cnt; j++) {
5043                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5044                                     &isudp);
5045                                 if (error != 0 && error != EPERM) {
5046                                         error = NFSERR_BADXDR;
5047                                         goto nfsmout;
5048                                 }
5049                                 if (error == 0 && isudp == 0) {
5050                                         /*
5051                                          * The priority is:
5052                                          * - Same address family.
5053                                          * Save the address and dspp, so that
5054                                          * the connection can be done after
5055                                          * parsing is complete.
5056                                          */
5057                                         if (safilled == AF_UNSPEC ||
5058                                             (af == nmp->nm_nam->sa_family &&
5059                                              safilled != nmp->nm_nam->sa_family)
5060                                            ) {
5061                                                 if (af == AF_INET)
5062                                                         ssin = sin;
5063                                                 else
5064                                                         ssin6 = sin6;
5065                                                 safilled = af;
5066                                                 gotdspp = dspp;
5067                                         }
5068                                 }
5069                         }
5070                 }
5071
5072                 gotvers = NFS_VER4;     /* Always NFSv4 for File Layout. */
5073                 /* For Flex File, we will take one of the versions to use. */
5074                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5075                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5076                         j = fxdr_unsigned(int, *tl);
5077                         if (j < 1 || j > NFSDEV_MAXVERS) {
5078                                 printf("pNFS: too many versions\n");
5079                                 error = NFSERR_BADXDR;
5080                                 goto nfsmout;
5081                         }
5082                         gotvers = 0;
5083                         for (i = 0; i < j; i++) {
5084                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5085                                 vers = fxdr_unsigned(uint32_t, *tl++);
5086                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5087                                 if ((vers == NFS_VER4 && minorvers ==
5088                                     NFSV41_MINORVERSION) || (vers == NFS_VER3 &&
5089                                     gotvers == 0)) {
5090                                         gotvers = vers;
5091                                         /* We'll take this one. */
5092                                         ndi->nfsdi_versindex = i;
5093                                         ndi->nfsdi_vers = vers;
5094                                         ndi->nfsdi_minorvers = minorvers;
5095                                         ndi->nfsdi_rsize = fxdr_unsigned(
5096                                             uint32_t, *tl++);
5097                                         ndi->nfsdi_wsize = fxdr_unsigned(
5098                                             uint32_t, *tl++);
5099                                         if (*tl == newnfs_true)
5100                                                 ndi->nfsdi_flags |=
5101                                                     NFSDI_TIGHTCOUPLED;
5102                                         else
5103                                                 ndi->nfsdi_flags &=
5104                                                     ~NFSDI_TIGHTCOUPLED;
5105                                 }
5106                         }
5107                         if (gotvers == 0) {
5108                                 printf("pNFS: no NFSv3 or NFSv4.1\n");
5109                                 error = NFSERR_BADXDR;
5110                                 goto nfsmout;
5111                         }
5112                 }
5113
5114                 /* And the notify bits. */
5115                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5116                 bitcnt = fxdr_unsigned(int, *tl);
5117                 if (bitcnt > 0) {
5118                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5119                         if (notifybitsp != NULL)
5120                                 *notifybitsp =
5121                                     fxdr_unsigned(uint32_t, *tl);
5122                 }
5123                 if (safilled != AF_UNSPEC) {
5124                         KASSERT(ndi != NULL, ("ndi is NULL"));
5125                         *ndip = ndi;
5126                 } else
5127                         error = EPERM;
5128                 if (error == 0) {
5129                         /*
5130                          * Now we can do a TCP connection for the correct
5131                          * NFS version and IP address.
5132                          */
5133                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5134                             gotvers, &dsp, p);
5135                 }
5136                 if (error == 0) {
5137                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5138                         *gotdspp = dsp;
5139                 }
5140         }
5141         if (nd->nd_repstat != 0 && error == 0)
5142                 error = nd->nd_repstat;
5143 nfsmout:
5144         if (error != 0 && ndi != NULL)
5145                 nfscl_freedevinfo(ndi);
5146         mbuf_freem(nd->nd_mrep);
5147         return (error);
5148 }
5149
5150 /*
5151  * Do the NFSv4.1 LayoutCommit.
5152  */
5153 int
5154 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5155     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5156     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5157 {
5158         uint32_t *tl;
5159         struct nfsrv_descript nfsd, *nd = &nfsd;
5160         int error;
5161
5162         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5163             0, 0);
5164         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5165             NFSX_STATEID);
5166         txdr_hyper(off, tl);
5167         tl += 2;
5168         txdr_hyper(len, tl);
5169         tl += 2;
5170         if (reclaim != 0)
5171                 *tl++ = newnfs_true;
5172         else
5173                 *tl++ = newnfs_false;
5174         *tl++ = txdr_unsigned(stateidp->seqid);
5175         *tl++ = stateidp->other[0];
5176         *tl++ = stateidp->other[1];
5177         *tl++ = stateidp->other[2];
5178         *tl++ = newnfs_true;
5179         if (lastbyte < off)
5180                 lastbyte = off;
5181         else if (lastbyte >= (off + len))
5182                 lastbyte = off + len - 1;
5183         txdr_hyper(lastbyte, tl);
5184         tl += 2;
5185         *tl++ = newnfs_false;
5186         *tl++ = txdr_unsigned(layouttype);
5187         /* All supported layouts are 0 length. */
5188         *tl = txdr_unsigned(0);
5189         nd->nd_flag |= ND_USEGSSNAME;
5190         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5191             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5192         if (error != 0)
5193                 return (error);
5194         error = nd->nd_repstat;
5195         mbuf_freem(nd->nd_mrep);
5196         return (error);
5197 }
5198
5199 /*
5200  * Do the NFSv4.1 LayoutReturn.
5201  */
5202 int
5203 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5204     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5205     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5206     void *stuff)
5207 {
5208         uint32_t *tl;
5209         struct nfsrv_descript nfsd, *nd = &nfsd;
5210         int error;
5211
5212         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5213             0, 0);
5214         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5215         if (reclaim != 0)
5216                 *tl++ = newnfs_true;
5217         else
5218                 *tl++ = newnfs_false;
5219         *tl++ = txdr_unsigned(layouttype);
5220         *tl++ = txdr_unsigned(iomode);
5221         *tl = txdr_unsigned(layoutreturn);
5222         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5223                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5224                     NFSX_UNSIGNED);
5225                 txdr_hyper(offset, tl);
5226                 tl += 2;
5227                 txdr_hyper(len, tl);
5228                 tl += 2;
5229                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5230                 *tl++ = txdr_unsigned(stateidp->seqid);
5231                 *tl++ = stateidp->other[0];
5232                 *tl++ = stateidp->other[1];
5233                 *tl++ = stateidp->other[2];
5234                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5235                         *tl = txdr_unsigned(0);
5236                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5237                         *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5238                         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5239                         /* No ioerrs or stats yet. */
5240                         *tl++ = 0;
5241                         *tl = 0;
5242                 }
5243         }
5244         nd->nd_flag |= ND_USEGSSNAME;
5245         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5246             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5247         if (error != 0)
5248                 return (error);
5249         if (nd->nd_repstat == 0) {
5250                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5251                 if (*tl != 0) {
5252                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5253                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5254                         stateidp->other[0] = *tl++;
5255                         stateidp->other[1] = *tl++;
5256                         stateidp->other[2] = *tl;
5257                 }
5258         } else
5259                 error = nd->nd_repstat;
5260 nfsmout:
5261         mbuf_freem(nd->nd_mrep);
5262         return (error);
5263 }
5264
5265 /*
5266  * Acquire a layout and devinfo, if possible. The caller must have acquired
5267  * a reference count on the nfsclclient structure before calling this.
5268  * Return the layout in lypp with a reference count on it, if successful.
5269  */
5270 static int
5271 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5272     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5273     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5274 {
5275         struct nfscllayout *lyp;
5276         struct nfsclflayout *flp;
5277         struct nfsclflayouthead flh;
5278         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5279         nfsv4stateid_t stateid;
5280         struct nfsclsession *tsep;
5281
5282         *lypp = NULL;
5283         if (NFSHASFLEXFILE(nmp))
5284                 layouttype = NFSLAYOUT_FLEXFILE;
5285         else
5286                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5287         /*
5288          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5289          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5290          * flp == NULL.
5291          */
5292         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5293             off, &flp, &recalled);
5294         islocked = 0;
5295         if (lyp == NULL || flp == NULL) {
5296                 if (recalled != 0)
5297                         return (EIO);
5298                 LIST_INIT(&flh);
5299                 tsep = nfsmnt_mdssession(nmp);
5300                 layoutlen = tsep->nfsess_maxcache -
5301                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5302                 if (lyp == NULL) {
5303                         stateid.seqid = 0;
5304                         stateid.other[0] = stateidp->other[0];
5305                         stateid.other[1] = stateidp->other[1];
5306                         stateid.other[2] = stateidp->other[2];
5307                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5308                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5309                             (uint64_t)0, layouttype, layoutlen, &stateid,
5310                             &retonclose, &flh, cred, p, NULL);
5311                 } else {
5312                         islocked = 1;
5313                         stateid.seqid = lyp->nfsly_stateid.seqid;
5314                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5315                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5316                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5317                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5318                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5319                             (uint64_t)0, layouttype, layoutlen, &stateid,
5320                             &retonclose, &flh, cred, p, NULL);
5321                 }
5322                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5323                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5324                     &flh, layouttype, error, NULL, cred, p);
5325                 if (error == 0)
5326                         *lypp = lyp;
5327                 else if (islocked != 0)
5328                         nfscl_rellayout(lyp, 1);
5329         } else
5330                 *lypp = lyp;
5331         return (error);
5332 }
5333
5334 /*
5335  * Do a TCP connection plus exchange id and create session.
5336  * If successful, a "struct nfsclds" is linked into the list for the
5337  * mount point and a pointer to it is returned.
5338  */
5339 static int
5340 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5341     struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp,
5342     NFSPROC_T *p)
5343 {
5344         struct sockaddr_in *msad, *sad;
5345         struct sockaddr_in6 *msad6, *sad6;
5346         struct nfsclclient *clp;
5347         struct nfssockreq *nrp;
5348         struct nfsclds *dsp, *tdsp;
5349         int error;
5350         enum nfsclds_state retv;
5351         uint32_t sequenceid;
5352
5353         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5354             ("nfsrpc_fillsa: NULL nr_cred"));
5355         NFSLOCKCLSTATE();
5356         clp = nmp->nm_clp;
5357         NFSUNLOCKCLSTATE();
5358         if (clp == NULL)
5359                 return (EPERM);
5360         if (af == AF_INET) {
5361                 NFSLOCKMNT(nmp);
5362                 /*
5363                  * Check to see if we already have a session for this
5364                  * address that is usable for a DS.
5365                  * Note that the MDS's address is in a different place
5366                  * than the sessions already acquired for DS's.
5367                  */
5368                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5369                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5370                 while (tdsp != NULL) {
5371                         if (msad != NULL && msad->sin_family == AF_INET &&
5372                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5373                             sin->sin_port == msad->sin_port &&
5374                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5375                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5376                                 *dspp = tdsp;
5377                                 NFSUNLOCKMNT(nmp);
5378                                 NFSCL_DEBUG(4, "fnd same addr\n");
5379                                 return (0);
5380                         }
5381                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5382                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5383                                 msad = (struct sockaddr_in *)
5384                                     tdsp->nfsclds_sockp->nr_nam;
5385                         else
5386                                 msad = NULL;
5387                 }
5388                 NFSUNLOCKMNT(nmp);
5389
5390                 /* No IP address match, so look for new/trunked one. */
5391                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5392                 sad->sin_len = sizeof(*sad);
5393                 sad->sin_family = AF_INET;
5394                 sad->sin_port = sin->sin_port;
5395                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5396                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5397                 nrp->nr_nam = (struct sockaddr *)sad;
5398         } else if (af == AF_INET6) {
5399                 NFSLOCKMNT(nmp);
5400                 /*
5401                  * Check to see if we already have a session for this
5402                  * address that is usable for a DS.
5403                  * Note that the MDS's address is in a different place
5404                  * than the sessions already acquired for DS's.
5405                  */
5406                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5407                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5408                 while (tdsp != NULL) {
5409                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5410                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5411                             &msad6->sin6_addr) &&
5412                             sin6->sin6_port == msad6->sin6_port &&
5413                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5414                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5415                                 *dspp = tdsp;
5416                                 NFSUNLOCKMNT(nmp);
5417                                 return (0);
5418                         }
5419                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5420                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5421                                 msad6 = (struct sockaddr_in6 *)
5422                                     tdsp->nfsclds_sockp->nr_nam;
5423                         else
5424                                 msad6 = NULL;
5425                 }
5426                 NFSUNLOCKMNT(nmp);
5427
5428                 /* No IP address match, so look for new/trunked one. */
5429                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5430                 sad6->sin6_len = sizeof(*sad6);
5431                 sad6->sin6_family = AF_INET6;
5432                 sad6->sin6_port = sin6->sin6_port;
5433                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5434                     sizeof(struct in6_addr));
5435                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5436                 nrp->nr_nam = (struct sockaddr *)sad6;
5437         } else
5438                 return (EPERM);
5439
5440         nrp->nr_sotype = SOCK_STREAM;
5441         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5442         nrp->nr_prog = NFS_PROG;
5443         nrp->nr_vers = vers;
5444
5445         /*
5446          * Use the credentials that were used for the mount, which are
5447          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5448          * Ref. counting the credentials with crhold() is probably not
5449          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5450          * unmount, but I did it anyhow.
5451          */
5452         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5453         error = newnfs_connect(nmp, nrp, NULL, p, 0);
5454         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5455
5456         dsp = NULL;
5457         /* Now, do the exchangeid and create session. */
5458         if (error == 0) {
5459                 if (vers == NFS_VER4) {
5460                         error = nfsrpc_exchangeid(nmp, clp, nrp,
5461                             NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p);
5462                         NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5463                         if (error != 0)
5464                                 newnfs_disconnect(nrp);
5465                 } else {
5466                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5467                             M_WAITOK | M_ZERO);
5468                         dsp->nfsclds_flags |= NFSCLDS_DS;
5469                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5470                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5471                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5472                             NULL, MTX_DEF);
5473                 }
5474         }
5475         if (error == 0) {
5476                 dsp->nfsclds_sockp = nrp;
5477                 if (vers == NFS_VER4) {
5478                         NFSLOCKMNT(nmp);
5479                         retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5480                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5481                         if (retv == NFSDSP_USETHISSESSION) {
5482                                 NFSUNLOCKMNT(nmp);
5483                                 /*
5484                                  * If there is already a session for this
5485                                  * server, use it.
5486                                  */
5487                                 (void)newnfs_disconnect(nrp);
5488                                 nfscl_freenfsclds(dsp);
5489                                 *dspp = tdsp;
5490                                 return (0);
5491                         }
5492                         if (retv == NFSDSP_SEQTHISSESSION)
5493                                 sequenceid =
5494                                     tdsp->nfsclds_sess.nfsess_sequenceid;
5495                         else
5496                                 sequenceid =
5497                                     dsp->nfsclds_sess.nfsess_sequenceid;
5498                         NFSUNLOCKMNT(nmp);
5499                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5500                             nrp, sequenceid, 0, nrp->nr_cred, p);
5501                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5502                 }
5503         } else {
5504                 NFSFREECRED(nrp->nr_cred);
5505                 NFSFREEMUTEX(&nrp->nr_mtx);
5506                 free(nrp->nr_nam, M_SONAME);
5507                 free(nrp, M_NFSSOCKREQ);
5508         }
5509         if (error == 0) {
5510                 NFSCL_DEBUG(3, "add DS session\n");
5511                 /*
5512                  * Put it at the end of the list. That way the list
5513                  * is ordered by when the entry was added. This matters
5514                  * since the one done first is the one that should be
5515                  * used for sequencid'ing any subsequent create sessions.
5516                  */
5517                 NFSLOCKMNT(nmp);
5518                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5519                 NFSUNLOCKMNT(nmp);
5520                 *dspp = dsp;
5521         } else if (dsp != NULL) {
5522                 newnfs_disconnect(nrp);
5523                 nfscl_freenfsclds(dsp);
5524         }
5525         return (error);
5526 }
5527
5528 /*
5529  * Do the NFSv4.1 Reclaim Complete.
5530  */
5531 int
5532 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5533 {
5534         uint32_t *tl;
5535         struct nfsrv_descript nfsd;
5536         struct nfsrv_descript *nd = &nfsd;
5537         int error;
5538
5539         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5540             0);
5541         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5542         *tl = newnfs_false;
5543         nd->nd_flag |= ND_USEGSSNAME;
5544         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5545             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5546         if (error != 0)
5547                 return (error);
5548         error = nd->nd_repstat;
5549         mbuf_freem(nd->nd_mrep);
5550         return (error);
5551 }
5552
5553 /*
5554  * Initialize the slot tables for a session.
5555  */
5556 static void
5557 nfscl_initsessionslots(struct nfsclsession *sep)
5558 {
5559         int i;
5560
5561         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5562                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5563                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5564                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5565         }
5566         for (i = 0; i < 64; i++)
5567                 sep->nfsess_slotseq[i] = 0;
5568         sep->nfsess_slots = 0;
5569 }
5570
5571 /*
5572  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5573  */
5574 int
5575 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5576     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5577 {
5578         struct nfsnode *np = VTONFS(vp);
5579         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5580         struct nfscllayout *layp;
5581         struct nfscldevinfo *dip;
5582         struct nfsclflayout *rflp;
5583         struct mbuf *m;
5584         struct nfsclwritedsdorpc *drpc, *tdrpc;
5585         nfsv4stateid_t stateid;
5586         struct ucred *newcred;
5587         uint64_t lastbyte, len, off, oresid, xfer;
5588         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5589         void *lckp;
5590         uint8_t *dev;
5591         void *iovbase;
5592         size_t iovlen;
5593         off_t offs;
5594         ssize_t resid;
5595
5596         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5597             (np->n_flag & NNOLAYOUT) != 0)
5598                 return (EIO);
5599         /* Now, get a reference cnt on the clientid for this mount. */
5600         if (nfscl_getref(nmp) == 0)
5601                 return (EIO);
5602
5603         /* Find an appropriate stateid. */
5604         newcred = NFSNEWCRED(cred);
5605         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5606             rwaccess, 1, newcred, p, &stateid, &lckp);
5607         if (error != 0) {
5608                 NFSFREECRED(newcred);
5609                 nfscl_relref(nmp);
5610                 return (error);
5611         }
5612         /* Search for a layout for this file. */
5613         off = uiop->uio_offset;
5614         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5615             np->n_fhp->nfh_len, off, &rflp, &recalled);
5616         if (layp == NULL || rflp == NULL) {
5617                 if (recalled != 0) {
5618                         NFSFREECRED(newcred);
5619                         nfscl_relref(nmp);
5620                         return (EIO);
5621                 }
5622                 if (layp != NULL) {
5623                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5624                         layp = NULL;
5625                 }
5626                 /* Try and get a Layout, if it is supported. */
5627                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5628                     (np->n_flag & NWRITEOPENED) != 0)
5629                         iolaymode = NFSLAYOUTIOMODE_RW;
5630                 else
5631                         iolaymode = NFSLAYOUTIOMODE_READ;
5632                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5633                     NULL, &stateid, off, &layp, newcred, p);
5634                 if (error != 0) {
5635                         NFSLOCKNODE(np);
5636                         np->n_flag |= NNOLAYOUT;
5637                         NFSUNLOCKNODE(np);
5638                         if (lckp != NULL)
5639                                 nfscl_lockderef(lckp);
5640                         NFSFREECRED(newcred);
5641                         if (layp != NULL)
5642                                 nfscl_rellayout(layp, 0);
5643                         nfscl_relref(nmp);
5644                         return (error);
5645                 }
5646         }
5647
5648         /*
5649          * Loop around finding a layout that works for the first part of
5650          * this I/O operation, and then call the function that actually
5651          * does the RPC.
5652          */
5653         eof = 0;
5654         len = (uint64_t)uiop->uio_resid;
5655         while (len > 0 && error == 0 && eof == 0) {
5656                 off = uiop->uio_offset;
5657                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5658                 if (error == 0) {
5659                         oresid = xfer = (uint64_t)uiop->uio_resid;
5660                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5661                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5662                         /*
5663                          * For Flex File layout with mirrored DSs, select one
5664                          * of them at random for reads. For writes and commits,
5665                          * do all mirrors.
5666                          */
5667                         m = NULL;
5668                         drpc = NULL;
5669                         firstmirror = 0;
5670                         mirrorcnt = 1;
5671                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5672                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5673                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5674                                         firstmirror = arc4random() % mirrorcnt;
5675                                         mirrorcnt = firstmirror + 1;
5676                                 } else {
5677                                         if (docommit == 0) {
5678                                                 /*
5679                                                  * Save values, so uiop can be
5680                                                  * rolled back upon a write
5681                                                  * error.
5682                                                  */
5683                                                 offs = uiop->uio_offset;
5684                                                 resid = uiop->uio_resid;
5685                                                 iovbase =
5686                                                     uiop->uio_iov->iov_base;
5687                                                 iovlen = uiop->uio_iov->iov_len;
5688                                                 m = nfsm_uiombuflist(uiop, len,
5689                                                     NULL, NULL);
5690                                         }
5691                                         tdrpc = drpc = malloc(sizeof(*drpc) *
5692                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
5693                                             M_ZERO);
5694                                 }
5695                         }
5696                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5697                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0)
5698                                         dev = rflp->nfsfl_ffm[i].dev;
5699                                 else
5700                                         dev = rflp->nfsfl_dev;
5701                                 dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5702                                     rflp->nfsfl_devp);
5703                                 if (dip != NULL) {
5704                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5705                                             != 0)
5706                                                 error = nfscl_dofflayoutio(vp,
5707                                                     uiop, iomode, must_commit,
5708                                                     &eof, &stateid, rwaccess,
5709                                                     dip, layp, rflp, off, xfer,
5710                                                     i, docommit, m, tdrpc,
5711                                                     newcred, p);
5712                                         else
5713                                                 error = nfscl_doflayoutio(vp,
5714                                                     uiop, iomode, must_commit,
5715                                                     &eof, &stateid, rwaccess,
5716                                                     dip, layp, rflp, off, xfer,
5717                                                     docommit, newcred, p);
5718                                         nfscl_reldevinfo(dip);
5719                                 } else
5720                                         error = EIO;
5721                                 tdrpc++;
5722                         }
5723                         if (m != NULL)
5724                                 m_freem(m);
5725                         tdrpc = drpc;
5726                         timo = hz / 50;         /* Wait for 20msec. */
5727                         if (timo < 1)
5728                                 timo = 1;
5729                         for (i = firstmirror; i < mirrorcnt - 1 &&
5730                             tdrpc != NULL; i++, tdrpc++) {
5731                                 /*
5732                                  * For the unused drpc entries, both inprog and
5733                                  * err == 0, so this loop won't break.
5734                                  */
5735                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5736                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
5737                                             timo);
5738                                 if (error == 0 && tdrpc->err != 0)
5739                                         error = tdrpc->err;
5740                         }
5741                         free(drpc, M_TEMP);
5742                         if (error == 0) {
5743                                 if (mirrorcnt > 1 && rwaccess ==
5744                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5745                                         NFSLOCKCLSTATE();
5746                                         layp->nfsly_flags |= NFSLY_WRITTEN;
5747                                         NFSUNLOCKCLSTATE();
5748                                 }
5749                                 lastbyte = off + xfer - 1;
5750                                 NFSLOCKCLSTATE();
5751                                 if (lastbyte > layp->nfsly_lastbyte)
5752                                         layp->nfsly_lastbyte = lastbyte;
5753                                 NFSUNLOCKCLSTATE();
5754                         } else if (error == NFSERR_OPENMODE &&
5755                             rwaccess == NFSV4OPEN_ACCESSREAD) {
5756                                 NFSLOCKMNT(nmp);
5757                                 nmp->nm_state |= NFSSTA_OPENMODE;
5758                                 NFSUNLOCKMNT(nmp);
5759                         } else
5760                                 error = EIO;
5761                         if (error == 0)
5762                                 len -= (oresid - (uint64_t)uiop->uio_resid);
5763                         else if (mirrorcnt > 1 && rwaccess ==
5764                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5765                                 /*
5766                                  * In case the rpc gets retried, roll the
5767                                  * uio fields changed by nfsm_uiombuflist()
5768                                  * back.
5769                                  */
5770                                 uiop->uio_offset = offs;
5771                                 uiop->uio_resid = resid;
5772                                 uiop->uio_iov->iov_base = iovbase;
5773                                 uiop->uio_iov->iov_len = iovlen;
5774                         }
5775                 }
5776         }
5777         if (lckp != NULL)
5778                 nfscl_lockderef(lckp);
5779         NFSFREECRED(newcred);
5780         nfscl_rellayout(layp, 0);
5781         nfscl_relref(nmp);
5782         return (error);
5783 }
5784
5785 /*
5786  * Make a copy of the mbuf chain and add an mbuf for null padding, as required.
5787  */
5788 static struct mbuf *
5789 nfsm_copym(struct mbuf *m, int off, int xfer)
5790 {
5791         struct mbuf *m2, *m3, *m4;
5792         uint32_t *tl;
5793         int rem;
5794
5795         m2 = m_copym(m, off, xfer, M_WAITOK);
5796         rem = NFSM_RNDUP(xfer) - xfer;
5797         if (rem > 0) {
5798                 /*
5799                  * The zero padding to a multiple of 4 bytes is required by
5800                  * the XDR. So that the mbufs copied by reference aren't
5801                  * modified, add an mbuf with the zero'd bytes to the list.
5802                  * rem will be a maximum of 3, so one zero'd uint32_t is
5803                  * sufficient.
5804                  */
5805                 m3 = m2;
5806                 while (m3->m_next != NULL)
5807                         m3 = m3->m_next;
5808                 NFSMGET(m4);
5809                 tl = NFSMTOD(m4, uint32_t *);
5810                 *tl = 0;
5811                 mbuf_setlen(m4, rem);
5812                 mbuf_setnext(m3, m4);
5813         }
5814         return (m2);
5815 }
5816
5817 /*
5818  * Find a file layout that will handle the first bytes of the requested
5819  * range and return the information from it needed to the I/O operation.
5820  */
5821 int
5822 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5823     struct nfsclflayout **retflpp)
5824 {
5825         struct nfsclflayout *flp, *nflp, *rflp;
5826         uint32_t rw;
5827
5828         rflp = NULL;
5829         rw = rwaccess;
5830         /* For reading, do the Read list first and then the Write list. */
5831         do {
5832                 if (rw == NFSV4OPEN_ACCESSREAD)
5833                         flp = LIST_FIRST(&lyp->nfsly_flayread);
5834                 else
5835                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
5836                 while (flp != NULL) {
5837                         nflp = LIST_NEXT(flp, nfsfl_list);
5838                         if (flp->nfsfl_off > off)
5839                                 break;
5840                         if (flp->nfsfl_end > off &&
5841                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5842                                 rflp = flp;
5843                         flp = nflp;
5844                 }
5845                 if (rw == NFSV4OPEN_ACCESSREAD)
5846                         rw = NFSV4OPEN_ACCESSWRITE;
5847                 else
5848                         rw = 0;
5849         } while (rw != 0);
5850         if (rflp != NULL) {
5851                 /* This one covers the most bytes starting at off. */
5852                 *retflpp = rflp;
5853                 return (0);
5854         }
5855         return (EIO);
5856 }
5857
5858 /*
5859  * Do I/O using an NFSv4.1 file layout.
5860  */
5861 static int
5862 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5863     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5864     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5865     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
5866 {
5867         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5868         int commit_thru_mds, error, stripe_index, stripe_pos;
5869         struct nfsnode *np;
5870         struct nfsfh *fhp;
5871         struct nfsclds **dspp;
5872
5873         np = VTONFS(vp);
5874         rel_off = off - flp->nfsfl_patoff;
5875         stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5876         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5877             dp->nfsdi_stripecnt;
5878         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5879         error = 0;
5880
5881         /* Loop around, doing I/O for each stripe unit. */
5882         while (len > 0 && error == 0) {
5883                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5884                 dspp = nfsfldi_addr(dp, stripe_index);
5885                 if (len > transfer && docommit == 0)
5886                         xfer = transfer;
5887                 else
5888                         xfer = len;
5889                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5890                         /* Dense layout. */
5891                         if (stripe_pos >= flp->nfsfl_fhcnt)
5892                                 return (EIO);
5893                         fhp = flp->nfsfl_fh[stripe_pos];
5894                         io_off = (rel_off / (stripe_unit_size *
5895                             dp->nfsdi_stripecnt)) * stripe_unit_size +
5896                             rel_off % stripe_unit_size;
5897                 } else {
5898                         /* Sparse layout. */
5899                         if (flp->nfsfl_fhcnt > 1) {
5900                                 if (stripe_index >= flp->nfsfl_fhcnt)
5901                                         return (EIO);
5902                                 fhp = flp->nfsfl_fh[stripe_index];
5903                         } else if (flp->nfsfl_fhcnt == 1)
5904                                 fhp = flp->nfsfl_fh[0];
5905                         else
5906                                 fhp = np->n_fhp;
5907                         io_off = off;
5908                 }
5909                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
5910                         commit_thru_mds = 1;
5911                         if (docommit != 0)
5912                                 error = EIO;
5913                 } else {
5914                         commit_thru_mds = 0;
5915                         mtx_lock(&np->n_mtx);
5916                         np->n_flag |= NDSCOMMIT;
5917                         mtx_unlock(&np->n_mtx);
5918                 }
5919                 if (docommit != 0) {
5920                         if (error == 0)
5921                                 error = nfsrpc_commitds(vp, io_off, xfer,
5922                                     *dspp, fhp, 0, 0, cred, p);
5923                         if (error == 0) {
5924                                 /*
5925                                  * Set both eof and uio_resid = 0 to end any
5926                                  * loops.
5927                                  */
5928                                 *eofp = 1;
5929                                 uiop->uio_resid = 0;
5930                         } else {
5931                                 mtx_lock(&np->n_mtx);
5932                                 np->n_flag &= ~NDSCOMMIT;
5933                                 mtx_unlock(&np->n_mtx);
5934                         }
5935                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
5936                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5937                             io_off, xfer, fhp, 0, 0, 0, cred, p);
5938                 else {
5939                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5940                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5941                             0, 0, 0, cred, p);
5942                         if (error == 0) {
5943                                 NFSLOCKCLSTATE();
5944                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
5945                                 NFSUNLOCKCLSTATE();
5946                         }
5947                 }
5948                 if (error == 0) {
5949                         transfer = stripe_unit_size;
5950                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5951                         len -= xfer;
5952                         off += xfer;
5953                 }
5954         }
5955         return (error);
5956 }
5957
5958 /*
5959  * Do I/O using an NFSv4.1 flex file layout.
5960  */
5961 static int
5962 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5963     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5964     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5965     uint64_t len, int mirror, int docommit, struct mbuf *mp,
5966     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
5967 {
5968         uint64_t transfer, xfer;
5969         int error, rel_off;
5970         struct nfsnode *np;
5971         struct nfsfh *fhp;
5972         struct nfsclds **dspp;
5973         struct ucred *tcred;
5974         struct mbuf *m;
5975
5976         np = VTONFS(vp);
5977         error = 0;
5978         rel_off = 0;
5979         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
5980             (uintmax_t)len);
5981         /* Loop around, doing I/O for each stripe unit. */
5982         while (len > 0 && error == 0) {
5983                 dspp = nfsfldi_addr(dp, 0);
5984                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
5985                 stateidp = &flp->nfsfl_ffm[mirror].st;
5986                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
5987                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
5988                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
5989                         tcred = NFSNEWCRED(cred);
5990                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
5991                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
5992                         tcred->cr_ngroups = 1;
5993                 } else
5994                         tcred = cred;
5995                 if (rwflag == NFSV4OPEN_ACCESSREAD)
5996                         transfer = dp->nfsdi_rsize;
5997                 else
5998                         transfer = dp->nfsdi_wsize;
5999                 mtx_lock(&np->n_mtx);
6000                 np->n_flag |= NDSCOMMIT;
6001                 mtx_unlock(&np->n_mtx);
6002                 if (len > transfer && docommit == 0)
6003                         xfer = transfer;
6004                 else
6005                         xfer = len;
6006                 if (docommit != 0) {
6007                         if (error == 0) {
6008                                 /*
6009                                  * Do last mirrored DS commit with this thread.
6010                                  */
6011                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6012                                         error = nfsio_commitds(vp, off, xfer,
6013                                             *dspp, fhp, dp->nfsdi_vers,
6014                                             dp->nfsdi_minorvers, drpc, tcred,
6015                                             p);
6016                                 else
6017                                         error = nfsrpc_commitds(vp, off, xfer,
6018                                             *dspp, fhp, dp->nfsdi_vers,
6019                                             dp->nfsdi_minorvers, tcred, p);
6020                         }
6021                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6022                         if (error == 0) {
6023                                 /*
6024                                  * Set both eof and uio_resid = 0 to end any
6025                                  * loops.
6026                                  */
6027                                 *eofp = 1;
6028                                 uiop->uio_resid = 0;
6029                         } else {
6030                                 mtx_lock(&np->n_mtx);
6031                                 np->n_flag &= ~NDSCOMMIT;
6032                                 mtx_unlock(&np->n_mtx);
6033                         }
6034                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6035                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6036                             off, xfer, fhp, 1, dp->nfsdi_vers,
6037                             dp->nfsdi_minorvers, tcred, p);
6038                 else {
6039                         if (flp->nfsfl_mirrorcnt == 1) {
6040                                 error = nfsrpc_writeds(vp, uiop, iomode,
6041                                     must_commit, stateidp, *dspp, off, xfer,
6042                                     fhp, 0, 1, dp->nfsdi_vers,
6043                                     dp->nfsdi_minorvers, tcred, p);
6044                                 if (error == 0) {
6045                                         NFSLOCKCLSTATE();
6046                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6047                                         NFSUNLOCKCLSTATE();
6048                                 }
6049                         } else {
6050                                 m = nfsm_copym(mp, rel_off, xfer);
6051                                 NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n",
6052                                     rel_off, (uintmax_t)xfer);
6053                                 /*
6054                                  * Do last write to a mirrored DS with this
6055                                  * thread.
6056                                  */
6057                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6058                                         error = nfsio_writedsmir(vp, iomode,
6059                                             must_commit, stateidp, *dspp, off,
6060                                             xfer, fhp, m, dp->nfsdi_vers,
6061                                             dp->nfsdi_minorvers, drpc, tcred,
6062                                             p);
6063                                 else
6064                                         error = nfsrpc_writedsmir(vp, iomode,
6065                                             must_commit, stateidp, *dspp, off,
6066                                             xfer, fhp, m, dp->nfsdi_vers,
6067                                             dp->nfsdi_minorvers, tcred, p);
6068                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6069                         }
6070                 }
6071                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6072                 if (error == 0) {
6073                         len -= xfer;
6074                         off += xfer;
6075                         rel_off += xfer;
6076                 }
6077                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6078                         NFSFREECRED(tcred);
6079         }
6080         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6081         return (error);
6082 }
6083
6084 /*
6085  * The actual read RPC done to a DS.
6086  */
6087 static int
6088 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6089     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6090     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6091 {
6092         uint32_t *tl;
6093         int attrflag, error, retlen;
6094         struct nfsrv_descript nfsd;
6095         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6096         struct nfsrv_descript *nd = &nfsd;
6097         struct nfssockreq *nrp;
6098         struct nfsvattr na;
6099
6100         nd->nd_mrep = NULL;
6101         if (vers == 0 || vers == NFS_VER4) {
6102                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6103                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6104                 vers = NFS_VER4;
6105                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6106                 if (flex != 0)
6107                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6108                 else
6109                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6110         } else {
6111                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6112                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6113                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6114         }
6115         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6116         txdr_hyper(io_off, tl);
6117         *(tl + 2) = txdr_unsigned(len);
6118         nrp = dsp->nfsclds_sockp;
6119         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6120         if (nrp == NULL)
6121                 /* If NULL, use the MDS socket. */
6122                 nrp = &nmp->nm_sockreq;
6123         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6124             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6125         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6126             error);
6127         if (error != 0)
6128                 return (error);
6129         if (vers == NFS_VER3) {
6130                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6131                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6132                 if (error != 0)
6133                         goto nfsmout;
6134         }
6135         if (nd->nd_repstat != 0) {
6136                 error = nd->nd_repstat;
6137                 goto nfsmout;
6138         }
6139         if (vers == NFS_VER3) {
6140                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6141                 *eofp = fxdr_unsigned(int, *(tl + 1));
6142         } else {
6143                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6144                 *eofp = fxdr_unsigned(int, *tl);
6145         }
6146         NFSM_STRSIZ(retlen, len);
6147         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6148         error = nfsm_mbufuio(nd, uiop, retlen);
6149 nfsmout:
6150         if (nd->nd_mrep != NULL)
6151                 mbuf_freem(nd->nd_mrep);
6152         return (error);
6153 }
6154
6155 /*
6156  * The actual write RPC done to a DS.
6157  */
6158 static int
6159 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6160     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6161     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6162     struct ucred *cred, NFSPROC_T *p)
6163 {
6164         uint32_t *tl;
6165         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6166         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6167         int32_t backup;
6168         struct nfsrv_descript nfsd;
6169         struct nfsrv_descript *nd = &nfsd;
6170         struct nfssockreq *nrp;
6171         struct nfsvattr na;
6172
6173         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6174         nd->nd_mrep = NULL;
6175         if (vers == 0 || vers == NFS_VER4) {
6176                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6177                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6178                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6179                 vers = NFS_VER4;
6180                 if (flex != 0)
6181                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6182                 else
6183                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6184                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6185         } else {
6186                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6187                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6188                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6189                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6190         }
6191         txdr_hyper(io_off, tl);
6192         tl += 2;
6193         if (vers == NFS_VER3)
6194                 *tl++ = txdr_unsigned(len);
6195         *tl++ = txdr_unsigned(*iomode);
6196         *tl = txdr_unsigned(len);
6197         nfsm_uiombuf(nd, uiop, len);
6198         nrp = dsp->nfsclds_sockp;
6199         if (nrp == NULL)
6200                 /* If NULL, use the MDS socket. */
6201                 nrp = &nmp->nm_sockreq;
6202         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6203             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6204         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6205             nd->nd_repstat);
6206         if (error != 0)
6207                 return (error);
6208         if (nd->nd_repstat != 0) {
6209                 /*
6210                  * In case the rpc gets retried, roll
6211                  * the uio fileds changed by nfsm_uiombuf()
6212                  * back.
6213                  */
6214                 uiop->uio_offset -= len;
6215                 uio_uio_resid_add(uiop, len);
6216                 uio_iov_base_add(uiop, -len);
6217                 uio_iov_len_add(uiop, len);
6218                 error = nd->nd_repstat;
6219         } else {
6220                 if (vers == NFS_VER3) {
6221                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6222                             NULL);
6223                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6224                         if (error != 0)
6225                                 goto nfsmout;
6226                 }
6227                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6228                 rlen = fxdr_unsigned(int, *tl++);
6229                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6230                 if (rlen == 0) {
6231                         error = NFSERR_IO;
6232                         goto nfsmout;
6233                 } else if (rlen < len) {
6234                         backup = len - rlen;
6235                         uio_iov_base_add(uiop, -(backup));
6236                         uio_iov_len_add(uiop, backup);
6237                         uiop->uio_offset -= backup;
6238                         uio_uio_resid_add(uiop, backup);
6239                         len = rlen;
6240                 }
6241                 commit = fxdr_unsigned(int, *tl++);
6242
6243                 /*
6244                  * Return the lowest commitment level
6245                  * obtained by any of the RPCs.
6246                  */
6247                 if (committed == NFSWRITE_FILESYNC)
6248                         committed = commit;
6249                 else if (committed == NFSWRITE_DATASYNC &&
6250                     commit == NFSWRITE_UNSTABLE)
6251                         committed = commit;
6252                 if (commit_thru_mds != 0) {
6253                         NFSLOCKMNT(nmp);
6254                         if (!NFSHASWRITEVERF(nmp)) {
6255                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6256                                 NFSSETWRITEVERF(nmp);
6257                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6258                                 *must_commit = 1;
6259                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6260                         }
6261                         NFSUNLOCKMNT(nmp);
6262                 } else {
6263                         NFSLOCKDS(dsp);
6264                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6265                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6266                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6267                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6268                                 *must_commit = 1;
6269                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6270                         }
6271                         NFSUNLOCKDS(dsp);
6272                 }
6273         }
6274 nfsmout:
6275         if (nd->nd_mrep != NULL)
6276                 mbuf_freem(nd->nd_mrep);
6277         *iomode = committed;
6278         if (nd->nd_repstat != 0 && error == 0)
6279                 error = nd->nd_repstat;
6280         return (error);
6281 }
6282
6283 /*
6284  * The actual write RPC done to a DS.
6285  * This variant is called from a separate kernel process for mirrors.
6286  * Any short write is considered an IO error.
6287  */
6288 static int
6289 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6290     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6291     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6292     struct ucred *cred, NFSPROC_T *p)
6293 {
6294         uint32_t *tl;
6295         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6296         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6297         struct nfsrv_descript nfsd;
6298         struct nfsrv_descript *nd = &nfsd;
6299         struct nfssockreq *nrp;
6300         struct nfsvattr na;
6301
6302         nd->nd_mrep = NULL;
6303         if (vers == 0 || vers == NFS_VER4) {
6304                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6305                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6306                 vers = NFS_VER4;
6307                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6308                     minorvers);
6309                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6310                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6311         } else {
6312                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6313                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6314                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6315                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6316         }
6317         txdr_hyper(io_off, tl);
6318         tl += 2;
6319         if (vers == NFS_VER3)
6320                 *tl++ = txdr_unsigned(len);
6321         *tl++ = txdr_unsigned(*iomode);
6322         *tl = txdr_unsigned(len);
6323         if (len > 0) {
6324                 /* Put data in mbuf chain. */
6325                 nd->nd_mb->m_next = m;
6326                 /* Set nd_mb and nd_bpos to end of data. */
6327                 while (m->m_next != NULL)
6328                         m = m->m_next;
6329                 nd->nd_mb = m;
6330                 nd->nd_bpos = mtod(m, char *) + m->m_len;
6331                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len);
6332         }
6333         nrp = dsp->nfsclds_sockp;
6334         if (nrp == NULL)
6335                 /* If NULL, use the MDS socket. */
6336                 nrp = &nmp->nm_sockreq;
6337         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6338             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6339         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6340             nd->nd_repstat);
6341         if (error != 0)
6342                 return (error);
6343         if (nd->nd_repstat != 0)
6344                 error = nd->nd_repstat;
6345         else {
6346                 if (vers == NFS_VER3) {
6347                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6348                             NULL);
6349                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6350                             error);
6351                         if (error != 0)
6352                                 goto nfsmout;
6353                 }
6354                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6355                 rlen = fxdr_unsigned(int, *tl++);
6356                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6357                     rlen);
6358                 if (rlen != len) {
6359                         error = NFSERR_IO;
6360                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6361                             len, rlen);
6362                         goto nfsmout;
6363                 }
6364                 commit = fxdr_unsigned(int, *tl++);
6365
6366                 /*
6367                  * Return the lowest commitment level
6368                  * obtained by any of the RPCs.
6369                  */
6370                 if (committed == NFSWRITE_FILESYNC)
6371                         committed = commit;
6372                 else if (committed == NFSWRITE_DATASYNC &&
6373                     commit == NFSWRITE_UNSTABLE)
6374                         committed = commit;
6375                 NFSLOCKDS(dsp);
6376                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6377                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6378                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6379                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6380                         *must_commit = 1;
6381                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6382                 }
6383                 NFSUNLOCKDS(dsp);
6384         }
6385 nfsmout:
6386         if (nd->nd_mrep != NULL)
6387                 mbuf_freem(nd->nd_mrep);
6388         *iomode = committed;
6389         if (nd->nd_repstat != 0 && error == 0)
6390                 error = nd->nd_repstat;
6391         return (error);
6392 }
6393
6394 /*
6395  * Start up the thread that will execute nfsrpc_writedsmir().
6396  */
6397 static void
6398 start_writedsmir(void *arg, int pending)
6399 {
6400         struct nfsclwritedsdorpc *drpc;
6401
6402         drpc = (struct nfsclwritedsdorpc *)arg;
6403         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6404             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6405             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6406             drpc->p);
6407         drpc->done = 1;
6408         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6409 }
6410
6411 /*
6412  * Set up the write DS mirror call for the pNFS I/O thread.
6413  */
6414 static int
6415 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6416     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6417     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6418     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6419 {
6420         int error, ret;
6421
6422         error = 0;
6423         drpc->done = 0;
6424         drpc->vp = vp;
6425         drpc->iomode = *iomode;
6426         drpc->must_commit = *must_commit;
6427         drpc->stateidp = stateidp;
6428         drpc->dsp = dsp;
6429         drpc->off = off;
6430         drpc->len = len;
6431         drpc->fhp = fhp;
6432         drpc->m = m;
6433         drpc->vers = vers;
6434         drpc->minorvers = minorvers;
6435         drpc->cred = cred;
6436         drpc->p = p;
6437         drpc->inprog = 0;
6438         ret = EIO;
6439         if (nfs_pnfsiothreads != 0) {
6440                 ret = nfs_pnfsio(start_writedsmir, drpc);
6441                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6442         }
6443         if (ret != 0)
6444                 error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6445                     dsp, off, len, fhp, m, vers, minorvers, cred, p);
6446         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6447         return (error);
6448 }
6449
6450 /*
6451  * Free up the nfsclds structure.
6452  */
6453 void
6454 nfscl_freenfsclds(struct nfsclds *dsp)
6455 {
6456         int i;
6457
6458         if (dsp == NULL)
6459                 return;
6460         if (dsp->nfsclds_sockp != NULL) {
6461                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6462                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6463                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6464                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6465         }
6466         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6467         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6468         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6469                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6470                         m_freem(
6471                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6472         }
6473         free(dsp, M_NFSCLDS);
6474 }
6475
6476 static enum nfsclds_state
6477 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6478     struct nfsclds **retdspp)
6479 {
6480         struct nfsclds *dsp, *cur_dsp;
6481
6482         /*
6483          * Search the list of nfsclds structures for one with the same
6484          * server.
6485          */
6486         cur_dsp = NULL;
6487         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6488                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6489                     dsp->nfsclds_servownlen != 0 &&
6490                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6491                     dsp->nfsclds_servownlen) &&
6492                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6493                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6494                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6495                             dsp->nfsclds_flags);
6496                         /* Server major id matches. */
6497                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6498                                 *retdspp = dsp;
6499                                 return (NFSDSP_USETHISSESSION);
6500                         }
6501
6502                         /*
6503                          * Note the first match, so it can be used for
6504                          * sequence'ing new sessions.
6505                          */
6506                         if (cur_dsp == NULL)
6507                                 cur_dsp = dsp;
6508                 }
6509         }
6510         if (cur_dsp != NULL) {
6511                 *retdspp = cur_dsp;
6512                 return (NFSDSP_SEQTHISSESSION);
6513         }
6514         return (NFSDSP_NOTFOUND);
6515 }
6516
6517 /*
6518  * NFS commit rpc to a NFSv4.1 DS.
6519  */
6520 static int
6521 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6522     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6523     NFSPROC_T *p)
6524 {
6525         uint32_t *tl;
6526         struct nfsrv_descript nfsd, *nd = &nfsd;
6527         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6528         struct nfssockreq *nrp;
6529         struct nfsvattr na;
6530         int attrflag, error;
6531         
6532         nd->nd_mrep = NULL;
6533         if (vers == 0 || vers == NFS_VER4) {
6534                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6535                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6536                 vers = NFS_VER4;
6537         } else
6538                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6539                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6540         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6541             minorvers);
6542         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6543         txdr_hyper(offset, tl);
6544         tl += 2;
6545         *tl = txdr_unsigned(cnt);
6546         nrp = dsp->nfsclds_sockp;
6547         if (nrp == NULL)
6548                 /* If NULL, use the MDS socket. */
6549                 nrp = &nmp->nm_sockreq;
6550         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6551             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6552         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6553             nd->nd_repstat);
6554         if (error != 0)
6555                 return (error);
6556         if (nd->nd_repstat == 0) {
6557                 if (vers == NFS_VER3) {
6558                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6559                             NULL);
6560                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6561                         if (error != 0)
6562                                 goto nfsmout;
6563                 }
6564                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6565                 NFSLOCKDS(dsp);
6566                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6567                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6568                         error = NFSERR_STALEWRITEVERF;
6569                 }
6570                 NFSUNLOCKDS(dsp);
6571         }
6572 nfsmout:
6573         if (error == 0 && nd->nd_repstat != 0)
6574                 error = nd->nd_repstat;
6575         mbuf_freem(nd->nd_mrep);
6576         return (error);
6577 }
6578
6579 /*
6580  * Start up the thread that will execute nfsrpc_commitds().
6581  */
6582 static void
6583 start_commitds(void *arg, int pending)
6584 {
6585         struct nfsclwritedsdorpc *drpc;
6586
6587         drpc = (struct nfsclwritedsdorpc *)arg;
6588         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
6589             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
6590             drpc->p);
6591         drpc->done = 1;
6592         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
6593 }
6594
6595 /*
6596  * Set up the commit DS mirror call for the pNFS I/O thread.
6597  */
6598 static int
6599 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6600     struct nfsfh *fhp, int vers, int minorvers,
6601     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6602 {
6603         int error, ret;
6604
6605         error = 0;
6606         drpc->done = 0;
6607         drpc->vp = vp;
6608         drpc->off = offset;
6609         drpc->len = cnt;
6610         drpc->dsp = dsp;
6611         drpc->fhp = fhp;
6612         drpc->vers = vers;
6613         drpc->minorvers = minorvers;
6614         drpc->cred = cred;
6615         drpc->p = p;
6616         drpc->inprog = 0;
6617         ret = EIO;
6618         if (nfs_pnfsiothreads != 0) {
6619                 ret = nfs_pnfsio(start_commitds, drpc);
6620                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
6621         }
6622         if (ret != 0)
6623                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
6624                     minorvers, cred, p);
6625         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
6626         return (error);
6627 }
6628
6629 /*
6630  * Set up the XDR arguments for the LayoutGet operation.
6631  */
6632 static void
6633 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
6634     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
6635     int layoutlen, int usecurstateid)
6636 {
6637         uint32_t *tl;
6638
6639         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
6640             NFSX_STATEID);
6641         *tl++ = newnfs_false;           /* Don't signal availability. */
6642         *tl++ = txdr_unsigned(layouttype);
6643         *tl++ = txdr_unsigned(iomode);
6644         txdr_hyper(offset, tl);
6645         tl += 2;
6646         txdr_hyper(len, tl);
6647         tl += 2;
6648         txdr_hyper(minlen, tl);
6649         tl += 2;
6650         if (usecurstateid != 0) {
6651                 /* Special stateid for Current stateid. */
6652                 *tl++ = txdr_unsigned(1);
6653                 *tl++ = 0;
6654                 *tl++ = 0;
6655                 *tl++ = 0;
6656         } else {
6657                 *tl++ = txdr_unsigned(stateidp->seqid);
6658                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
6659                 *tl++ = stateidp->other[0];
6660                 *tl++ = stateidp->other[1];
6661                 *tl++ = stateidp->other[2];
6662         }
6663         *tl = txdr_unsigned(layoutlen);
6664 }
6665
6666 /*
6667  * Parse the reply for a successful LayoutGet operation.
6668  */
6669 static int
6670 nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6671     int *retonclosep, struct nfsclflayouthead *flhp)
6672 {
6673         uint32_t *tl;
6674         struct nfsclflayout *flp, *prevflp, *tflp;
6675         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
6676         int m, mirrorcnt;
6677         uint64_t retlen, off;
6678         struct nfsfh *nfhp;
6679         uint8_t *cp;
6680         uid_t user;
6681         gid_t grp;
6682
6683         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
6684         error = 0;
6685         flp = NULL;
6686         gotiomode = -1;
6687         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
6688         if (*tl++ != 0)
6689                 *retonclosep = 1;
6690         else
6691                 *retonclosep = 0;
6692         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
6693         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
6694             (int)stateidp->seqid);
6695         stateidp->other[0] = *tl++;
6696         stateidp->other[1] = *tl++;
6697         stateidp->other[2] = *tl++;
6698         cnt = fxdr_unsigned(int, *tl);
6699         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
6700         if (cnt <= 0 || cnt > 10000) {
6701                 /* Don't accept more than 10000 layouts in reply. */
6702                 error = NFSERR_BADXDR;
6703                 goto nfsmout;
6704         }
6705         for (i = 0; i < cnt; i++) {
6706                 /* Dissect to the layout type. */
6707                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
6708                     3 * NFSX_UNSIGNED);
6709                 off = fxdr_hyper(tl); tl += 2;
6710                 retlen = fxdr_hyper(tl); tl += 2;
6711                 iomode = fxdr_unsigned(int, *tl++);
6712                 laytype = fxdr_unsigned(int, *tl);
6713                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
6714                     (uintmax_t)off, (uintmax_t)retlen, iomode);
6715                 /* Ignore length of layout body for now. */
6716                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
6717                         /* Parse the File layout up to fhcnt. */
6718                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
6719                             NFSX_HYPER + NFSX_V4DEVICEID);
6720                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
6721                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
6722                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6723                         if (fhcnt < 0 || fhcnt > 100) {
6724                                 /* Don't accept more than 100 file handles. */
6725                                 error = NFSERR_BADXDR;
6726                                 goto nfsmout;
6727                         }
6728                         if (fhcnt > 0)
6729                                 flp = malloc(sizeof(*flp) + fhcnt *
6730                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
6731                                     M_WAITOK);
6732                         else
6733                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
6734                                     M_WAITOK);
6735                         flp->nfsfl_flags = NFSFL_FILE;
6736                         flp->nfsfl_fhcnt = 0;
6737                         flp->nfsfl_devp = NULL;
6738                         flp->nfsfl_off = off;
6739                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6740                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6741                         else
6742                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
6743                         flp->nfsfl_iomode = iomode;
6744                         if (gotiomode == -1)
6745                                 gotiomode = flp->nfsfl_iomode;
6746                         /* Ignore layout body length for now. */
6747                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
6748                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6749                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
6750                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
6751                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
6752                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
6753                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
6754                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
6755                         for (j = 0; j < fhcnt; j++) {
6756                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6757                                 nfhlen = fxdr_unsigned(int, *tl);
6758                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
6759                                         error = NFSERR_BADXDR;
6760                                         goto nfsmout;
6761                                 }
6762                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
6763                                     M_NFSFH, M_WAITOK);
6764                                 flp->nfsfl_fh[j] = nfhp;
6765                                 flp->nfsfl_fhcnt++;
6766                                 nfhp->nfh_len = nfhlen;
6767                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
6768                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
6769                         }
6770                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
6771                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
6772                             NFSX_HYPER);
6773                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
6774                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
6775                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
6776                                 error = NFSERR_BADXDR;
6777                                 goto nfsmout;
6778                         }
6779                         flp = malloc(sizeof(*flp) + mirrorcnt *
6780                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
6781                         flp->nfsfl_flags = NFSFL_FLEXFILE;
6782                         flp->nfsfl_mirrorcnt = mirrorcnt;
6783                         flp->nfsfl_devp = NULL;
6784                         flp->nfsfl_off = off;
6785                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6786                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6787                         else
6788                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
6789                         flp->nfsfl_iomode = iomode;
6790                         if (gotiomode == -1)
6791                                 gotiomode = flp->nfsfl_iomode;
6792                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
6793                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
6794                             (uintmax_t)flp->nfsfl_stripeunit);
6795                         for (j = 0; j < mirrorcnt; j++) {
6796                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6797                                 k = fxdr_unsigned(int, *tl);
6798                                 if (k < 1 || k > 128) {
6799                                         error = NFSERR_BADXDR;
6800                                         goto nfsmout;
6801                                 }
6802                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
6803                                 for (l = 0; l < k; l++) {
6804                                         NFSM_DISSECT(tl, uint32_t *,
6805                                             NFSX_V4DEVICEID + NFSX_STATEID +
6806                                             2 * NFSX_UNSIGNED);
6807                                         if (l == 0) {
6808                                                 /* Just use the first server. */
6809                                                 NFSBCOPY(tl,
6810                                                     flp->nfsfl_ffm[j].dev,
6811                                                     NFSX_V4DEVICEID);
6812                                                 tl += (NFSX_V4DEVICEID /
6813                                                     NFSX_UNSIGNED);
6814                                                 tl++;
6815                                                 flp->nfsfl_ffm[j].st.seqid =
6816                                                     *tl++;
6817                                                 flp->nfsfl_ffm[j].st.other[0] =
6818                                                     *tl++;
6819                                                 flp->nfsfl_ffm[j].st.other[1] =
6820                                                     *tl++;
6821                                                 flp->nfsfl_ffm[j].st.other[2] =
6822                                                     *tl++;
6823                                                 NFSCL_DEBUG(4, "st.seqid=%u "
6824                                                  "st.o0=0x%x st.o1=0x%x "
6825                                                  "st.o2=0x%x\n",
6826                                                  flp->nfsfl_ffm[j].st.seqid,
6827                                                  flp->nfsfl_ffm[j].st.other[0],
6828                                                  flp->nfsfl_ffm[j].st.other[1],
6829                                                  flp->nfsfl_ffm[j].st.other[2]);
6830                                         } else
6831                                                 tl += ((NFSX_V4DEVICEID +
6832                                                     NFSX_STATEID +
6833                                                     NFSX_UNSIGNED) /
6834                                                     NFSX_UNSIGNED);
6835                                         fhcnt = fxdr_unsigned(int, *tl);
6836                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6837                                         if (fhcnt < 1 ||
6838                                             fhcnt > NFSDEV_MAXVERS) {
6839                                                 error = NFSERR_BADXDR;
6840                                                 goto nfsmout;
6841                                         }
6842                                         for (m = 0; m < fhcnt; m++) {
6843                                                 NFSM_DISSECT(tl, uint32_t *,
6844                                                     NFSX_UNSIGNED);
6845                                                 nfhlen = fxdr_unsigned(int,
6846                                                     *tl);
6847                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
6848                                                     nfhlen);
6849                                                 if (nfhlen <= 0 || nfhlen >
6850                                                     NFSX_V4FHMAX) {
6851                                                         error = NFSERR_BADXDR;
6852                                                         goto nfsmout;
6853                                                 }
6854                                                 NFSM_DISSECT(cp, uint8_t *,
6855                                                     NFSM_RNDUP(nfhlen));
6856                                                 if (l == 0) {
6857                                                         flp->nfsfl_ffm[j].fhcnt 
6858                                                             = fhcnt;
6859                                                         nfhp = malloc(
6860                                                             sizeof(*nfhp) +
6861                                                             nfhlen - 1, M_NFSFH,
6862                                                             M_WAITOK);
6863                                                         flp->nfsfl_ffm[j].fh[m]
6864                                                             = nfhp;
6865                                                         nfhp->nfh_len = nfhlen;
6866                                                         NFSBCOPY(cp,
6867                                                             nfhp->nfh_fh,
6868                                                             nfhlen);
6869                                                         NFSCL_DEBUG(4,
6870                                                             "got fh\n");
6871                                                 }
6872                                         }
6873                                         /* Now, get the ffsd_user/ffds_group. */
6874                                         error = nfsrv_parseug(nd, 0, &user,
6875                                             &grp, curthread);
6876                                         NFSCL_DEBUG(4, "after parseu=%d\n",
6877                                             error);
6878                                         if (error == 0)
6879                                                 error = nfsrv_parseug(nd, 1,
6880                                                     &user, &grp, curthread);
6881                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
6882                                             grp);
6883                                         if (error != 0)
6884                                                 goto nfsmout;
6885                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
6886                                             user, grp);
6887                                         if (l == 0) {
6888                                                 flp->nfsfl_ffm[j].user = user;
6889                                                 flp->nfsfl_ffm[j].group = grp;
6890                                                 NFSCL_DEBUG(4,
6891                                                     "usr=%d grp=%d\n", user,
6892                                                     grp);
6893                                         }
6894                                 }
6895                         }
6896                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6897                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
6898                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
6899                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
6900                             flp->nfsfl_fflags, flp->nfsfl_statshint);
6901                 } else {
6902                         error = NFSERR_BADXDR;
6903                         goto nfsmout;
6904                 }
6905                 if (flp->nfsfl_iomode == gotiomode) {
6906                         /* Keep the list in increasing offset order. */
6907                         tflp = LIST_FIRST(flhp);
6908                         prevflp = NULL;
6909                         while (tflp != NULL &&
6910                             tflp->nfsfl_off < flp->nfsfl_off) {
6911                                 prevflp = tflp;
6912                                 tflp = LIST_NEXT(tflp, nfsfl_list);
6913                         }
6914                         if (prevflp == NULL)
6915                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
6916                         else
6917                                 LIST_INSERT_AFTER(prevflp, flp,
6918                                     nfsfl_list);
6919                         NFSCL_DEBUG(4, "flp inserted\n");
6920                 } else {
6921                         printf("nfscl_layoutget(): got wrong iomode\n");
6922                         nfscl_freeflayout(flp);
6923                 }
6924                 flp = NULL;
6925         }
6926 nfsmout:
6927         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
6928         if (error != 0 && flp != NULL)
6929                 nfscl_freeflayout(flp);
6930         return (error);
6931 }
6932
6933 /*
6934  * Parse a user/group digit string.
6935  */
6936 static int
6937 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
6938     NFSPROC_T *p)
6939 {
6940         uint32_t *tl;
6941         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
6942         uint32_t len = 0;
6943         int error = 0;
6944
6945         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6946         len = fxdr_unsigned(uint32_t, *tl);
6947         str = NULL;
6948         if (len > NFSV4_OPAQUELIMIT) {
6949                 error = NFSERR_BADXDR;
6950                 goto nfsmout;
6951         }
6952         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
6953         if (len == 0) {
6954                 if (dogrp != 0)
6955                         *gidp = GID_NOGROUP;
6956                 else
6957                         *uidp = UID_NOBODY;
6958                 return (0);
6959         }
6960         if (len > NFSV4_SMALLSTR)
6961                 str = malloc(len + 1, M_TEMP, M_WAITOK);
6962         else
6963                 str = str0;
6964         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
6965         NFSBCOPY(cp, str, len);
6966         str[len] = '\0';
6967         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
6968         if (dogrp != 0)
6969                 error = nfsv4_strtogid(nd, str, len, gidp, p);
6970         else
6971                 error = nfsv4_strtouid(nd, str, len, uidp, p);
6972 nfsmout:
6973         if (len > NFSV4_SMALLSTR)
6974                 free(str, M_TEMP);
6975         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
6976         return (error);
6977 }
6978
6979 /*
6980  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
6981  * so that it does both an Open and a Layoutget.
6982  */
6983 static int
6984 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6985     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6986     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6987     struct ucred *cred, NFSPROC_T *p)
6988 {
6989         struct nfscllayout *lyp;
6990         struct nfsclflayout *flp;
6991         struct nfsclflayouthead flh;
6992         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
6993         int layouttype, laystat;
6994         nfsv4stateid_t stateid;
6995         struct nfsclsession *tsep;
6996
6997         error = 0;
6998         if (NFSHASFLEXFILE(nmp))
6999                 layouttype = NFSLAYOUT_FLEXFILE;
7000         else
7001                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7002         /*
7003          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7004          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7005          * flp == NULL.
7006          */
7007         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7008             &recalled);
7009         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7010         if (lyp == NULL)
7011                 islocked = 0;
7012         else if (flp != NULL)
7013                 islocked = 1;
7014         else
7015                 islocked = 2;
7016         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7017                 LIST_INIT(&flh);
7018                 tsep = nfsmnt_mdssession(nmp);
7019                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7020                     3 * NFSX_UNSIGNED);
7021                 if (lyp == NULL)
7022                         usecurstateid = 1;
7023                 else {
7024                         usecurstateid = 0;
7025                         stateid.seqid = lyp->nfsly_stateid.seqid;
7026                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7027                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7028                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7029                 }
7030                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7031                     newfhp, newfhlen, mode, op, name, namelen,
7032                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7033                     &retonclose, &flh, &laystat, cred, p);
7034                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7035                     laystat, error);
7036                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7037                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7038                     &islocked, cred, p);
7039         } else
7040                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7041                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7042         if (islocked == 2)
7043                 nfscl_rellayout(lyp, 1);
7044         else if (islocked == 1)
7045                 nfscl_rellayout(lyp, 0);
7046         return (error);
7047 }
7048
7049 /*
7050  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7051  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7052  * handled by nfsrpc_openrpc().
7053  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7054  * can be NULL.
7055  */
7056 static int
7057 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7058     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7059     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7060     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7061     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7062     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7063 {
7064         uint32_t *tl;
7065         struct nfsrv_descript nfsd, *nd = &nfsd;
7066         struct nfscldeleg *ndp = NULL;
7067         struct nfsvattr nfsva;
7068         struct nfsclsession *tsep;
7069         uint32_t rflags, deleg;
7070         nfsattrbit_t attrbits;
7071         int error, ret, acesize, limitby, iomode;
7072
7073         *dpp = NULL;
7074         *laystatp = ENXIO;
7075         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7076             0, 0);
7077         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7078         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7079         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7080         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7081         tsep = nfsmnt_mdssession(nmp);
7082         *tl++ = tsep->nfsess_clientid.lval[0];
7083         *tl = tsep->nfsess_clientid.lval[1];
7084         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7085         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7086         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7087         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7088         nfsm_strtom(nd, name, namelen);
7089         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7090         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7091         NFSZERO_ATTRBIT(&attrbits);
7092         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7093         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7094         nfsrv_putattrbit(nd, &attrbits);
7095         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7096         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7097         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7098                 iomode = NFSLAYOUTIOMODE_RW;
7099         else
7100                 iomode = NFSLAYOUTIOMODE_READ;
7101         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7102             layouttype, layoutlen, usecurstateid);
7103         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7104             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7105         if (error != 0)
7106                 return (error);
7107         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7108         if (nd->nd_repstat != 0)
7109                 *laystatp = nd->nd_repstat;
7110         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7111                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7112                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7113                     6 * NFSX_UNSIGNED);
7114                 op->nfso_stateid.seqid = *tl++;
7115                 op->nfso_stateid.other[0] = *tl++;
7116                 op->nfso_stateid.other[1] = *tl++;
7117                 op->nfso_stateid.other[2] = *tl;
7118                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7119                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7120                 if (error != 0)
7121                         goto nfsmout;
7122                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7123                 deleg = fxdr_unsigned(u_int32_t, *tl);
7124                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7125                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7126                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7127                               NFSCLFLAGS_FIRSTDELEG))
7128                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7129                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7130                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7131                             M_NFSCLDELEG, M_WAITOK);
7132                         LIST_INIT(&ndp->nfsdl_owner);
7133                         LIST_INIT(&ndp->nfsdl_lock);
7134                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7135                         ndp->nfsdl_fhlen = newfhlen;
7136                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7137                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7138                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7139                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7140                             NFSX_UNSIGNED);
7141                         ndp->nfsdl_stateid.seqid = *tl++;
7142                         ndp->nfsdl_stateid.other[0] = *tl++;
7143                         ndp->nfsdl_stateid.other[1] = *tl++;
7144                         ndp->nfsdl_stateid.other[2] = *tl++;
7145                         ret = fxdr_unsigned(int, *tl);
7146                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7147                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7148                                 /*
7149                                  * Indicates how much the file can grow.
7150                                  */
7151                                 NFSM_DISSECT(tl, u_int32_t *,
7152                                     3 * NFSX_UNSIGNED);
7153                                 limitby = fxdr_unsigned(int, *tl++);
7154                                 switch (limitby) {
7155                                 case NFSV4OPEN_LIMITSIZE:
7156                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7157                                         break;
7158                                 case NFSV4OPEN_LIMITBLOCKS:
7159                                         ndp->nfsdl_sizelimit =
7160                                             fxdr_unsigned(u_int64_t, *tl++);
7161                                         ndp->nfsdl_sizelimit *=
7162                                             fxdr_unsigned(u_int64_t, *tl);
7163                                         break;
7164                                 default:
7165                                         error = NFSERR_BADXDR;
7166                                         goto nfsmout;
7167                                 };
7168                         } else
7169                                 ndp->nfsdl_flags = NFSCLDL_READ;
7170                         if (ret != 0)
7171                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7172                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7173                             &acesize, p);
7174                         if (error != 0)
7175                                 goto nfsmout;
7176                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7177                         error = NFSERR_BADXDR;
7178                         goto nfsmout;
7179                 }
7180                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7181                     nfscl_assumeposixlocks)
7182                         op->nfso_posixlock = 1;
7183                 else
7184                         op->nfso_posixlock = 0;
7185                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7186                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7187                 if (*++tl == 0) {
7188                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7189                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7190                             NULL, NULL, NULL, p, cred);
7191                         if (error != 0)
7192                                 goto nfsmout;
7193                         if (ndp != NULL) {
7194                                 ndp->nfsdl_change = nfsva.na_filerev;
7195                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7196                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7197                                 *dpp = ndp;
7198                                 ndp = NULL;
7199                         }
7200                         /*
7201                          * At this point, the Open has succeeded, so set
7202                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7203                          * this function just won't return a layout.
7204                          */
7205                         if (nd->nd_repstat == 0) {
7206                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7207                                 *laystatp = fxdr_unsigned(int, *++tl);
7208                                 if (*laystatp == 0) {
7209                                         error = nfsrv_parselayoutget(nd,
7210                                             stateidp, retonclosep, flhp);
7211                                         if (error != 0)
7212                                                 *laystatp = error;
7213                                 }
7214                         } else
7215                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7216                 }
7217         }
7218         if (nd->nd_repstat != 0 && error == 0)
7219                 error = nd->nd_repstat;
7220 nfsmout:
7221         free(ndp, M_NFSCLDELEG);
7222         mbuf_freem(nd->nd_mrep);
7223         return (error);
7224 }
7225
7226 /*
7227  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7228  * Used only for mounts with pNFS enabled.
7229  */
7230 static int
7231 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7232     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7233     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7234     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7235     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7236     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7237     struct nfsclflayouthead *flhp, int *laystatp)
7238 {
7239         uint32_t *tl;
7240         int error = 0, deleg, newone, ret, acesize, limitby;
7241         struct nfsrv_descript nfsd, *nd = &nfsd;
7242         struct nfsclopen *op;
7243         struct nfscldeleg *dp = NULL;
7244         struct nfsnode *np;
7245         struct nfsfh *nfhp;
7246         struct nfsclsession *tsep;
7247         nfsattrbit_t attrbits;
7248         nfsv4stateid_t stateid;
7249         struct nfsmount *nmp;
7250
7251         nmp = VFSTONFS(dvp->v_mount);
7252         np = VTONFS(dvp);
7253         *laystatp = ENXIO;
7254         *unlockedp = 0;
7255         *nfhpp = NULL;
7256         *dpp = NULL;
7257         *attrflagp = 0;
7258         *dattrflagp = 0;
7259         if (namelen > NFS_MAXNAMLEN)
7260                 return (ENAMETOOLONG);
7261         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7262         /*
7263          * For V4, this is actually an Open op.
7264          */
7265         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7266         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7267         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7268             NFSV4OPEN_ACCESSREAD);
7269         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7270         tsep = nfsmnt_mdssession(nmp);
7271         *tl++ = tsep->nfsess_clientid.lval[0];
7272         *tl = tsep->nfsess_clientid.lval[1];
7273         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7274         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7275         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7276         if ((fmode & O_EXCL) != 0) {
7277                 if (NFSHASSESSPERSIST(nmp)) {
7278                         /* Use GUARDED for persistent sessions. */
7279                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
7280                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7281                 } else {
7282                         /* Otherwise, use EXCLUSIVE4_1. */
7283                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7284                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7285                         *tl++ = cverf.lval[0];
7286                         *tl = cverf.lval[1];
7287                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7288                 }
7289         } else {
7290                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7291                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
7292         }
7293         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7294         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7295         nfsm_strtom(nd, name, namelen);
7296         /* Get the new file's handle and attributes, plus save the FH. */
7297         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7298         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7299         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
7300         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7301         NFSGETATTR_ATTRBIT(&attrbits);
7302         nfsrv_putattrbit(nd, &attrbits);
7303         /* Get the directory's post-op attributes. */
7304         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7305         *tl = txdr_unsigned(NFSV4OP_PUTFH);
7306         nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7307         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7308         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7309         nfsrv_putattrbit(nd, &attrbits);
7310         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7311         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7312         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7313         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7314             layouttype, layoutlen, usecurstateid);
7315         error = nfscl_request(nd, dvp, p, cred, dstuff);
7316         if (error != 0)
7317                 return (error);
7318         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7319             error);
7320         if (nd->nd_repstat != 0)
7321                 *laystatp = nd->nd_repstat;
7322         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7323         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7324                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7325                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7326                     6 * NFSX_UNSIGNED);
7327                 stateid.seqid = *tl++;
7328                 stateid.other[0] = *tl++;
7329                 stateid.other[1] = *tl++;
7330                 stateid.other[2] = *tl;
7331                 nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7332                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7333                 deleg = fxdr_unsigned(int, *tl);
7334                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7335                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7336                         if (!(owp->nfsow_clp->nfsc_flags &
7337                               NFSCLFLAGS_FIRSTDELEG))
7338                                 owp->nfsow_clp->nfsc_flags |=
7339                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7340                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7341                             M_NFSCLDELEG, M_WAITOK);
7342                         LIST_INIT(&dp->nfsdl_owner);
7343                         LIST_INIT(&dp->nfsdl_lock);
7344                         dp->nfsdl_clp = owp->nfsow_clp;
7345                         newnfs_copyincred(cred, &dp->nfsdl_cred);
7346                         nfscl_lockinit(&dp->nfsdl_rwlock);
7347                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7348                             NFSX_UNSIGNED);
7349                         dp->nfsdl_stateid.seqid = *tl++;
7350                         dp->nfsdl_stateid.other[0] = *tl++;
7351                         dp->nfsdl_stateid.other[1] = *tl++;
7352                         dp->nfsdl_stateid.other[2] = *tl++;
7353                         ret = fxdr_unsigned(int, *tl);
7354                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7355                                 dp->nfsdl_flags = NFSCLDL_WRITE;
7356                                 /*
7357                                  * Indicates how much the file can grow.
7358                                  */
7359                                 NFSM_DISSECT(tl, u_int32_t *,
7360                                     3 * NFSX_UNSIGNED);
7361                                 limitby = fxdr_unsigned(int, *tl++);
7362                                 switch (limitby) {
7363                                 case NFSV4OPEN_LIMITSIZE:
7364                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
7365                                         break;
7366                                 case NFSV4OPEN_LIMITBLOCKS:
7367                                         dp->nfsdl_sizelimit =
7368                                             fxdr_unsigned(u_int64_t, *tl++);
7369                                         dp->nfsdl_sizelimit *=
7370                                             fxdr_unsigned(u_int64_t, *tl);
7371                                         break;
7372                                 default:
7373                                         error = NFSERR_BADXDR;
7374                                         goto nfsmout;
7375                                 };
7376                         } else {
7377                                 dp->nfsdl_flags = NFSCLDL_READ;
7378                         }
7379                         if (ret != 0)
7380                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
7381                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7382                             &acesize, p);
7383                         if (error != 0)
7384                                 goto nfsmout;
7385                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7386                         error = NFSERR_BADXDR;
7387                         goto nfsmout;
7388                 }
7389
7390                 /* Now, we should have the status for the SaveFH. */
7391                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7392                 if (*++tl == 0) {
7393                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7394                         /*
7395                          * Now, process the GetFH and Getattr for the newly
7396                          * created file. nfscl_mtofh() will set
7397                          * ND_NOMOREDATA if these weren't successful.
7398                          */
7399                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7400                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7401                         if (error != 0)
7402                                 goto nfsmout;
7403                 } else
7404                         nd->nd_flag |= ND_NOMOREDATA;
7405                 /* Now we have the PutFH and Getattr for the directory. */
7406                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7407                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7408                         if (*++tl != 0)
7409                                 nd->nd_flag |= ND_NOMOREDATA;
7410                         else {
7411                                 NFSM_DISSECT(tl, uint32_t *, 2 *
7412                                     NFSX_UNSIGNED);
7413                                 if (*++tl != 0)
7414                                         nd->nd_flag |= ND_NOMOREDATA;
7415                         }
7416                 }
7417                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7418                         /* Load the directory attributes. */
7419                         error = nfsm_loadattr(nd, dnap);
7420                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7421                         if (error != 0)
7422                                 goto nfsmout;
7423                         *dattrflagp = 1;
7424                         if (dp != NULL && *attrflagp != 0) {
7425                                 dp->nfsdl_change = nnap->na_filerev;
7426                                 dp->nfsdl_modtime = nnap->na_mtime;
7427                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7428                         }
7429                         /*
7430                          * We can now complete the Open state.
7431                          */
7432                         nfhp = *nfhpp;
7433                         if (dp != NULL) {
7434                                 dp->nfsdl_fhlen = nfhp->nfh_len;
7435                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7436                                     nfhp->nfh_len);
7437                         }
7438                         /*
7439                          * Get an Open structure that will be
7440                          * attached to the OpenOwner, acquired already.
7441                          */
7442                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
7443                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7444                             cred, p, NULL, &op, &newone, NULL, 0);
7445                         if (error != 0)
7446                                 goto nfsmout;
7447                         op->nfso_stateid = stateid;
7448                         newnfs_copyincred(cred, &op->nfso_cred);
7449         
7450                         nfscl_openrelease(nmp, op, error, newone);
7451                         *unlockedp = 1;
7452
7453                         /* Now, handle the RestoreFH and LayoutGet. */
7454                         if (nd->nd_repstat == 0) {
7455                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7456                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
7457                                 if (*laystatp == 0) {
7458                                         error = nfsrv_parselayoutget(nd,
7459                                             stateidp, retonclosep, flhp);
7460                                         if (error != 0)
7461                                                 *laystatp = error;
7462                                 }
7463                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7464                                     error);
7465                         } else
7466                                 nd->nd_repstat = 0;
7467                 }
7468         }
7469         if (nd->nd_repstat != 0 && error == 0)
7470                 error = nd->nd_repstat;
7471         if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7472                 nfscl_initiate_recovery(owp->nfsow_clp);
7473 nfsmout:
7474         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7475         if (error == 0)
7476                 *dpp = dp;
7477         else
7478                 free(dp, M_NFSCLDELEG);
7479         mbuf_freem(nd->nd_mrep);
7480         return (error);
7481 }
7482
7483 /*
7484  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7485  */
7486 static int
7487 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7488     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7489     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7490     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7491     int *dattrflagp, void *dstuff, int *unlockedp)
7492 {
7493         struct nfscllayout *lyp;
7494         struct nfsclflayouthead flh;
7495         struct nfsfh *nfhp;
7496         struct nfsclsession *tsep;
7497         struct nfsmount *nmp;
7498         nfsv4stateid_t stateid;
7499         int error, layoutlen, layouttype, retonclose, laystat;
7500
7501         error = 0;
7502         nmp = VFSTONFS(dvp->v_mount);
7503         if (NFSHASFLEXFILE(nmp))
7504                 layouttype = NFSLAYOUT_FLEXFILE;
7505         else
7506                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7507         LIST_INIT(&flh);
7508         tsep = nfsmnt_mdssession(nmp);
7509         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7510         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7511             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
7512             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
7513             &flh, &laystat);
7514         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
7515             laystat, error);
7516         lyp = NULL;
7517         if (laystat == 0) {
7518                 nfhp = *nfhpp;
7519                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
7520                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
7521                     layouttype, laystat, NULL, cred, p);
7522         } else
7523                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
7524                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
7525                     cred, p);
7526         if (laystat == 0)
7527                 nfscl_rellayout(lyp, 0);
7528         return (error);
7529 }
7530
7531 /*
7532  * Process the results of a layoutget() operation.
7533  */
7534 static int
7535 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
7536     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
7537     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
7538     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
7539 {
7540         struct nfsclflayout *tflp;
7541         struct nfscldevinfo *dip;
7542         uint8_t *dev;
7543
7544         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
7545                 NFSLOCKMNT(nmp);
7546                 if (!NFSHASFLEXFILE(nmp)) {
7547                         /* Switch to using Flex File Layout. */
7548                         nmp->nm_state |= NFSSTA_FLEXFILE;
7549                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7550                         /* Disable pNFS. */
7551                         NFSCL_DEBUG(1, "disable PNFS\n");
7552                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
7553                 }
7554                 NFSUNLOCKMNT(nmp);
7555         }
7556         if (laystat == 0) {
7557                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
7558                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
7559                         laystat = nfscl_adddevinfo(nmp, NULL, tflp);
7560                         NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
7561                         if (laystat != 0) {
7562                                 if (layouttype == NFSLAYOUT_FLEXFILE)
7563                                         dev = tflp->nfsfl_ffm[0].dev;
7564                                 else
7565                                         dev = tflp->nfsfl_dev;
7566                                 laystat = nfsrpc_getdeviceinfo(nmp, dev,
7567                                     layouttype, notifybit, &dip, cred, p);
7568                                 NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
7569                                     laystat);
7570                                 if (laystat != 0)
7571                                         break;
7572                                 laystat = nfscl_adddevinfo(nmp, dip, tflp);
7573                                 if (laystat != 0)
7574                                         printf("getlayout: cannot add\n");
7575                         }
7576                 }
7577         }
7578         if (laystat == 0) {
7579                 /*
7580                  * nfscl_layout() always returns with the nfsly_lock
7581                  * set to a refcnt (shared lock).
7582                  * Passing in dvp is sufficient, since it is only used to
7583                  * get the fsid for the file system.
7584                  */
7585                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
7586                     layouttype, retonclose, flhp, lypp, cred, p);
7587                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
7588                     laystat);
7589                 if (laystat == 0 && islockedp != NULL)
7590                         *islockedp = 1;
7591         }
7592         return (laystat);
7593 }
7594