]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
Make the pNFS NFSv4.1 client return a Flexible File layout upon error.
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46
47 #ifndef APPLEKEXT
48 #include "opt_inet6.h"
49
50 #include <fs/nfs/nfsport.h>
51 #include <sys/sysctl.h>
52 #include <sys/taskqueue.h>
53
54 SYSCTL_DECL(_vfs_nfs);
55
56 static int      nfsignore_eexist = 0;
57 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
58     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
59
60 /*
61  * Global variables
62  */
63 extern int nfs_numnfscbd;
64 extern struct timeval nfsboottime;
65 extern u_int32_t newnfs_false, newnfs_true;
66 extern nfstype nfsv34_type[9];
67 extern int nfsrv_useacl;
68 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
69 extern int nfscl_debuglevel;
70 extern int nfs_pnfsiothreads;
71 NFSCLSTATEMUTEX;
72 int nfstest_outofseq = 0;
73 int nfscl_assumeposixlocks = 1;
74 int nfscl_enablecallb = 0;
75 short nfsv4_cbport = NFSV4_CBPORT;
76 int nfstest_openallsetattr = 0;
77 #endif  /* !APPLEKEXT */
78
79 #define DIRHDSIZ        offsetof(struct dirent, d_name)
80
81 /*
82  * nfscl_getsameserver() can return one of three values:
83  * NFSDSP_USETHISSESSION - Use this session for the DS.
84  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
85  *     session.
86  * NFSDSP_NOTFOUND - No matching server was found.
87  */
88 enum nfsclds_state {
89         NFSDSP_USETHISSESSION = 0,
90         NFSDSP_SEQTHISSESSION = 1,
91         NFSDSP_NOTFOUND = 2,
92 };
93
94 /*
95  * Do a write RPC on a DS data file, using this structure for the arguments,
96  * so that this function can be executed by a separate kernel process.
97  */
98 struct nfsclwritedsdorpc {
99         int                     done;
100         int                     inprog;
101         struct task             tsk;
102         struct vnode            *vp;
103         int                     iomode;
104         int                     must_commit;
105         nfsv4stateid_t          *stateidp;
106         struct nfsclds          *dsp;
107         uint64_t                off;
108         int                     len;
109         struct nfsfh            *fhp;
110         struct mbuf             *m;
111         int                     vers;
112         int                     minorvers;
113         struct ucred            *cred;
114         NFSPROC_T               *p;
115         int                     err;
116 };
117
118 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
119     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
120 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
121     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
122 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
123     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
124     void *);
125 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
126     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
127     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
128 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
129     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
130     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
131     int *, void *, int *);
132 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
133     struct nfscllockowner *, u_int64_t, u_int64_t,
134     u_int32_t, struct ucred *, NFSPROC_T *, int);
135 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
136     struct acl *, nfsv4stateid_t *, void *);
137 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
138     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
139     struct ucred *, NFSPROC_T *);
140 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
141     struct sockaddr_in6 *, sa_family_t, int, struct nfsclds **, NFSPROC_T *);
142 static void nfscl_initsessionslots(struct nfsclsession *);
143 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
144     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
145     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
146     NFSPROC_T *);
147 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
148     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
149     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
150     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
151 static struct mbuf *nfsm_copym(struct mbuf *, int, int);
152 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
153     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
154     struct ucred *, NFSPROC_T *);
155 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
156     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
157     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
158 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
159     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
160     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
161 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
162     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
163     struct ucred *, NFSPROC_T *);
164 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
165     struct nfsclds *, struct nfsclds **);
166 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
167     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
168     NFSPROC_T *);
169 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
170     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
171 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
172     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
173 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
174     NFSPROC_T *);
175 static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
176     int *, struct nfsclflayouthead *);
177 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
178     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
179     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
180 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
181     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
182     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
183     struct nfsfh **, int *, int *, void *, int *);
184 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
185     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
186     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
187     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
188 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
189     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
190     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
191     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
192     int, int, int, int *, struct nfsclflayouthead *, int *);
193 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
194     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
195     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
196 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
197     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
198     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
199
200 int nfs_pnfsio(task_fn_t *, void *);
201
202 /*
203  * nfs null call from vfs.
204  */
205 APPLESTATIC int
206 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
207 {
208         int error;
209         struct nfsrv_descript nfsd, *nd = &nfsd;
210         
211         NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
212         error = nfscl_request(nd, vp, p, cred, NULL);
213         if (nd->nd_repstat && !error)
214                 error = nd->nd_repstat;
215         mbuf_freem(nd->nd_mrep);
216         return (error);
217 }
218
219 /*
220  * nfs access rpc op.
221  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
222  * modes are changed on the server, accesses might still fail later.
223  */
224 APPLESTATIC int
225 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
226     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
227 {
228         int error;
229         u_int32_t mode, rmode;
230
231         if (acmode & VREAD)
232                 mode = NFSACCESS_READ;
233         else
234                 mode = 0;
235         if (vnode_vtype(vp) == VDIR) {
236                 if (acmode & VWRITE)
237                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
238                                  NFSACCESS_DELETE);
239                 if (acmode & VEXEC)
240                         mode |= NFSACCESS_LOOKUP;
241         } else {
242                 if (acmode & VWRITE)
243                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
244                 if (acmode & VEXEC)
245                         mode |= NFSACCESS_EXECUTE;
246         }
247
248         /*
249          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
250          */
251         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
252             NULL);
253
254         /*
255          * The NFS V3 spec does not clarify whether or not
256          * the returned access bits can be a superset of
257          * the ones requested, so...
258          */
259         if (!error && (rmode & mode) != mode)
260                 error = EACCES;
261         return (error);
262 }
263
264 /*
265  * The actual rpc, separated out for Darwin.
266  */
267 APPLESTATIC int
268 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
269     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
270     void *stuff)
271 {
272         u_int32_t *tl;
273         u_int32_t supported, rmode;
274         int error;
275         struct nfsrv_descript nfsd, *nd = &nfsd;
276         nfsattrbit_t attrbits;
277
278         *attrflagp = 0;
279         supported = mode;
280         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
281         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
282         *tl = txdr_unsigned(mode);
283         if (nd->nd_flag & ND_NFSV4) {
284                 /*
285                  * And do a Getattr op.
286                  */
287                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
288                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
289                 NFSGETATTR_ATTRBIT(&attrbits);
290                 (void) nfsrv_putattrbit(nd, &attrbits);
291         }
292         error = nfscl_request(nd, vp, p, cred, stuff);
293         if (error)
294                 return (error);
295         if (nd->nd_flag & ND_NFSV3) {
296                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
297                 if (error)
298                         goto nfsmout;
299         }
300         if (!nd->nd_repstat) {
301                 if (nd->nd_flag & ND_NFSV4) {
302                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
303                         supported = fxdr_unsigned(u_int32_t, *tl++);
304                 } else {
305                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
306                 }
307                 rmode = fxdr_unsigned(u_int32_t, *tl);
308                 if (nd->nd_flag & ND_NFSV4)
309                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
310
311                 /*
312                  * It's not obvious what should be done about
313                  * unsupported access modes. For now, be paranoid
314                  * and clear the unsupported ones.
315                  */
316                 rmode &= supported;
317                 *rmodep = rmode;
318         } else
319                 error = nd->nd_repstat;
320 nfsmout:
321         mbuf_freem(nd->nd_mrep);
322         return (error);
323 }
324
325 /*
326  * nfs open rpc
327  */
328 APPLESTATIC int
329 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
330 {
331         struct nfsclopen *op;
332         struct nfscldeleg *dp;
333         struct nfsfh *nfhp;
334         struct nfsnode *np = VTONFS(vp);
335         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
336         u_int32_t mode, clidrev;
337         int ret, newone, error, expireret = 0, retrycnt;
338
339         /*
340          * For NFSv4, Open Ops are only done on Regular Files.
341          */
342         if (vnode_vtype(vp) != VREG)
343                 return (0);
344         mode = 0;
345         if (amode & FREAD)
346                 mode |= NFSV4OPEN_ACCESSREAD;
347         if (amode & FWRITE)
348                 mode |= NFSV4OPEN_ACCESSWRITE;
349         nfhp = np->n_fhp;
350
351         retrycnt = 0;
352 #ifdef notdef
353 { char name[100]; int namel;
354 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
355 bcopy(NFS4NODENAME(np->n_v4), name, namel);
356 name[namel] = '\0';
357 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
358 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
359 else printf(" fhl=0\n");
360 }
361 #endif
362         do {
363             dp = NULL;
364             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
365                 cred, p, NULL, &op, &newone, &ret, 1);
366             if (error) {
367                 return (error);
368             }
369             if (nmp->nm_clp != NULL)
370                 clidrev = nmp->nm_clp->nfsc_clientidrev;
371             else
372                 clidrev = 0;
373             if (ret == NFSCLOPEN_DOOPEN) {
374                 if (np->n_v4 != NULL) {
375                         /*
376                          * For the first attempt, try and get a layout, if
377                          * pNFS is enabled for the mount.
378                          */
379                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
380                             nfs_numnfscbd == 0 ||
381                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
382                                 error = nfsrpc_openrpc(nmp, vp,
383                                     np->n_v4->n4_data,
384                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
385                                     np->n_fhp->nfh_len, mode, op,
386                                     NFS4NODENAME(np->n_v4),
387                                     np->n_v4->n4_namelen,
388                                     &dp, 0, 0x0, cred, p, 0, 0);
389                         else
390                                 error = nfsrpc_getopenlayout(nmp, vp,
391                                     np->n_v4->n4_data,
392                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
393                                     np->n_fhp->nfh_len, mode, op,
394                                     NFS4NODENAME(np->n_v4),
395                                     np->n_v4->n4_namelen, &dp, cred, p);
396                         if (dp != NULL) {
397 #ifdef APPLE
398                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
399 #else
400                                 NFSLOCKNODE(np);
401                                 np->n_flag &= ~NDELEGMOD;
402                                 /*
403                                  * Invalidate the attribute cache, so that
404                                  * attributes that pre-date the issue of a
405                                  * delegation are not cached, since the
406                                  * cached attributes will remain valid while
407                                  * the delegation is held.
408                                  */
409                                 NFSINVALATTRCACHE(np);
410                                 NFSUNLOCKNODE(np);
411 #endif
412                                 (void) nfscl_deleg(nmp->nm_mountp,
413                                     op->nfso_own->nfsow_clp,
414                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
415                         }
416                 } else {
417                         error = EIO;
418                 }
419                 newnfs_copyincred(cred, &op->nfso_cred);
420             } else if (ret == NFSCLOPEN_SETCRED)
421                 /*
422                  * This is a new local open on a delegation. It needs
423                  * to have credentials so that an open can be done
424                  * against the server during recovery.
425                  */
426                 newnfs_copyincred(cred, &op->nfso_cred);
427
428             /*
429              * nfso_opencnt is the count of how many VOP_OPEN()s have
430              * been done on this Open successfully and a VOP_CLOSE()
431              * is expected for each of these.
432              * If error is non-zero, don't increment it, since the Open
433              * hasn't succeeded yet.
434              */
435             if (!error)
436                 op->nfso_opencnt++;
437             nfscl_openrelease(nmp, op, error, newone);
438             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
439                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
440                 error == NFSERR_BADSESSION) {
441                 (void) nfs_catnap(PZERO, error, "nfs_open");
442             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
443                 && clidrev != 0) {
444                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
445                 retrycnt++;
446             }
447         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
448             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
449             error == NFSERR_BADSESSION ||
450             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
451              expireret == 0 && clidrev != 0 && retrycnt < 4));
452         if (error && retrycnt >= 4)
453                 error = EIO;
454         return (error);
455 }
456
457 /*
458  * the actual open rpc
459  */
460 APPLESTATIC int
461 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
462     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
463     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
464     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
465     int syscred, int recursed)
466 {
467         u_int32_t *tl;
468         struct nfsrv_descript nfsd, *nd = &nfsd;
469         struct nfscldeleg *dp, *ndp = NULL;
470         struct nfsvattr nfsva;
471         u_int32_t rflags, deleg;
472         nfsattrbit_t attrbits;
473         int error, ret, acesize, limitby;
474         struct nfsclsession *tsep;
475
476         dp = *dpp;
477         *dpp = NULL;
478         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
479         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
480         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
481         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
482         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
483         tsep = nfsmnt_mdssession(nmp);
484         *tl++ = tsep->nfsess_clientid.lval[0];
485         *tl = tsep->nfsess_clientid.lval[1];
486         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
487         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
488         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
489         if (reclaim) {
490                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
491                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
492                 *tl = txdr_unsigned(delegtype);
493         } else {
494                 if (dp != NULL) {
495                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
496                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
497                         if (NFSHASNFSV4N(nmp))
498                                 *tl++ = 0;
499                         else
500                                 *tl++ = dp->nfsdl_stateid.seqid;
501                         *tl++ = dp->nfsdl_stateid.other[0];
502                         *tl++ = dp->nfsdl_stateid.other[1];
503                         *tl = dp->nfsdl_stateid.other[2];
504                 } else {
505                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
506                 }
507                 (void) nfsm_strtom(nd, name, namelen);
508         }
509         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
510         *tl = txdr_unsigned(NFSV4OP_GETATTR);
511         NFSZERO_ATTRBIT(&attrbits);
512         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
513         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
514         (void) nfsrv_putattrbit(nd, &attrbits);
515         if (syscred)
516                 nd->nd_flag |= ND_USEGSSNAME;
517         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
518             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
519         if (error)
520                 return (error);
521         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
522         if (!nd->nd_repstat) {
523                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
524                     6 * NFSX_UNSIGNED);
525                 op->nfso_stateid.seqid = *tl++;
526                 op->nfso_stateid.other[0] = *tl++;
527                 op->nfso_stateid.other[1] = *tl++;
528                 op->nfso_stateid.other[2] = *tl;
529                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
530                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
531                 if (error)
532                         goto nfsmout;
533                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
534                 deleg = fxdr_unsigned(u_int32_t, *tl);
535                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
536                     deleg == NFSV4OPEN_DELEGATEWRITE) {
537                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
538                               NFSCLFLAGS_FIRSTDELEG))
539                                 op->nfso_own->nfsow_clp->nfsc_flags |=
540                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
541                         ndp = malloc(
542                             sizeof (struct nfscldeleg) + newfhlen,
543                             M_NFSCLDELEG, M_WAITOK);
544                         LIST_INIT(&ndp->nfsdl_owner);
545                         LIST_INIT(&ndp->nfsdl_lock);
546                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
547                         ndp->nfsdl_fhlen = newfhlen;
548                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
549                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
550                         nfscl_lockinit(&ndp->nfsdl_rwlock);
551                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
552                             NFSX_UNSIGNED);
553                         ndp->nfsdl_stateid.seqid = *tl++;
554                         ndp->nfsdl_stateid.other[0] = *tl++;
555                         ndp->nfsdl_stateid.other[1] = *tl++;
556                         ndp->nfsdl_stateid.other[2] = *tl++;
557                         ret = fxdr_unsigned(int, *tl);
558                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
559                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
560                                 /*
561                                  * Indicates how much the file can grow.
562                                  */
563                                 NFSM_DISSECT(tl, u_int32_t *,
564                                     3 * NFSX_UNSIGNED);
565                                 limitby = fxdr_unsigned(int, *tl++);
566                                 switch (limitby) {
567                                 case NFSV4OPEN_LIMITSIZE:
568                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
569                                         break;
570                                 case NFSV4OPEN_LIMITBLOCKS:
571                                         ndp->nfsdl_sizelimit =
572                                             fxdr_unsigned(u_int64_t, *tl++);
573                                         ndp->nfsdl_sizelimit *=
574                                             fxdr_unsigned(u_int64_t, *tl);
575                                         break;
576                                 default:
577                                         error = NFSERR_BADXDR;
578                                         goto nfsmout;
579                                 }
580                         } else {
581                                 ndp->nfsdl_flags = NFSCLDL_READ;
582                         }
583                         if (ret)
584                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
585                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
586                             &acesize, p);
587                         if (error)
588                                 goto nfsmout;
589                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
590                         error = NFSERR_BADXDR;
591                         goto nfsmout;
592                 }
593                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
594                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
595                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
596                     NULL, NULL, NULL, p, cred);
597                 if (error)
598                         goto nfsmout;
599                 if (ndp != NULL) {
600                         ndp->nfsdl_change = nfsva.na_filerev;
601                         ndp->nfsdl_modtime = nfsva.na_mtime;
602                         ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
603                 }
604                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
605                     do {
606                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
607                             cred, p);
608                         if (ret == NFSERR_DELAY)
609                             (void) nfs_catnap(PZERO, ret, "nfs_open");
610                     } while (ret == NFSERR_DELAY);
611                     error = ret;
612                 }
613                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
614                     nfscl_assumeposixlocks)
615                     op->nfso_posixlock = 1;
616                 else
617                     op->nfso_posixlock = 0;
618
619                 /*
620                  * If the server is handing out delegations, but we didn't
621                  * get one because an OpenConfirm was required, try the
622                  * Open again, to get a delegation. This is a harmless no-op,
623                  * from a server's point of view.
624                  */
625                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
626                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
627                     && !error && dp == NULL && ndp == NULL && !recursed) {
628                     do {
629                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
630                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
631                             cred, p, syscred, 1);
632                         if (ret == NFSERR_DELAY)
633                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
634                     } while (ret == NFSERR_DELAY);
635                     if (ret) {
636                         if (ndp != NULL) {
637                                 free(ndp, M_NFSCLDELEG);
638                                 ndp = NULL;
639                         }
640                         if (ret == NFSERR_STALECLIENTID ||
641                             ret == NFSERR_STALEDONTRECOVER ||
642                             ret == NFSERR_BADSESSION)
643                                 error = ret;
644                     }
645                 }
646         }
647         if (nd->nd_repstat != 0 && error == 0)
648                 error = nd->nd_repstat;
649         if (error == NFSERR_STALECLIENTID)
650                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
651 nfsmout:
652         if (!error)
653                 *dpp = ndp;
654         else if (ndp != NULL)
655                 free(ndp, M_NFSCLDELEG);
656         mbuf_freem(nd->nd_mrep);
657         return (error);
658 }
659
660 /*
661  * open downgrade rpc
662  */
663 APPLESTATIC int
664 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
665     struct ucred *cred, NFSPROC_T *p)
666 {
667         u_int32_t *tl;
668         struct nfsrv_descript nfsd, *nd = &nfsd;
669         int error;
670
671         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
672         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
673         if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
674                 *tl++ = 0;
675         else
676                 *tl++ = op->nfso_stateid.seqid;
677         *tl++ = op->nfso_stateid.other[0];
678         *tl++ = op->nfso_stateid.other[1];
679         *tl++ = op->nfso_stateid.other[2];
680         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
681         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
682         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
683         error = nfscl_request(nd, vp, p, cred, NULL);
684         if (error)
685                 return (error);
686         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
687         if (!nd->nd_repstat) {
688                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
689                 op->nfso_stateid.seqid = *tl++;
690                 op->nfso_stateid.other[0] = *tl++;
691                 op->nfso_stateid.other[1] = *tl++;
692                 op->nfso_stateid.other[2] = *tl;
693         }
694         if (nd->nd_repstat && error == 0)
695                 error = nd->nd_repstat;
696         if (error == NFSERR_STALESTATEID)
697                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
698 nfsmout:
699         mbuf_freem(nd->nd_mrep);
700         return (error);
701 }
702
703 /*
704  * V4 Close operation.
705  */
706 APPLESTATIC int
707 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
708 {
709         struct nfsclclient *clp;
710         int error;
711
712         if (vnode_vtype(vp) != VREG)
713                 return (0);
714         if (doclose)
715                 error = nfscl_doclose(vp, &clp, p);
716         else
717                 error = nfscl_getclose(vp, &clp);
718         if (error)
719                 return (error);
720
721         nfscl_clientrelease(clp);
722         return (0);
723 }
724
725 /*
726  * Close the open.
727  */
728 APPLESTATIC void
729 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
730 {
731         struct nfsrv_descript nfsd, *nd = &nfsd;
732         struct nfscllockowner *lp, *nlp;
733         struct nfscllock *lop, *nlop;
734         struct ucred *tcred;
735         u_int64_t off = 0, len = 0;
736         u_int32_t type = NFSV4LOCKT_READ;
737         int error, do_unlock, trycnt;
738
739         tcred = newnfs_getcred();
740         newnfs_copycred(&op->nfso_cred, tcred);
741         /*
742          * (Theoretically this could be done in the same
743          *  compound as the close, but having multiple
744          *  sequenced Ops in the same compound might be
745          *  too scary for some servers.)
746          */
747         if (op->nfso_posixlock) {
748                 off = 0;
749                 len = NFS64BITSSET;
750                 type = NFSV4LOCKT_READ;
751         }
752
753         /*
754          * Since this function is only called from VOP_INACTIVE(), no
755          * other thread will be manipulating this Open. As such, the
756          * lock lists are not being changed by other threads, so it should
757          * be safe to do this without locking.
758          */
759         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
760                 do_unlock = 1;
761                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
762                         if (op->nfso_posixlock == 0) {
763                                 off = lop->nfslo_first;
764                                 len = lop->nfslo_end - lop->nfslo_first;
765                                 if (lop->nfslo_type == F_WRLCK)
766                                         type = NFSV4LOCKT_WRITE;
767                                 else
768                                         type = NFSV4LOCKT_READ;
769                         }
770                         if (do_unlock) {
771                                 trycnt = 0;
772                                 do {
773                                         error = nfsrpc_locku(nd, nmp, lp, off,
774                                             len, type, tcred, p, 0);
775                                         if ((nd->nd_repstat == NFSERR_GRACE ||
776                                             nd->nd_repstat == NFSERR_DELAY) &&
777                                             error == 0)
778                                                 (void) nfs_catnap(PZERO,
779                                                     (int)nd->nd_repstat,
780                                                     "nfs_close");
781                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
782                                     nd->nd_repstat == NFSERR_DELAY) &&
783                                     error == 0 && trycnt++ < 5);
784                                 if (op->nfso_posixlock)
785                                         do_unlock = 0;
786                         }
787                         nfscl_freelock(lop, 0);
788                 }
789                 /*
790                  * Do a ReleaseLockOwner.
791                  * The lock owner name nfsl_owner may be used by other opens for
792                  * other files but the lock_owner4 name that nfsrpc_rellockown()
793                  * puts on the wire has the file handle for this file appended
794                  * to it, so it can be done now.
795                  */
796                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
797                     lp->nfsl_open->nfso_fhlen, tcred, p);
798         }
799
800         /*
801          * There could be other Opens for different files on the same
802          * OpenOwner, so locking is required.
803          */
804         NFSLOCKCLSTATE();
805         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
806         NFSUNLOCKCLSTATE();
807         do {
808                 error = nfscl_tryclose(op, tcred, nmp, p);
809                 if (error == NFSERR_GRACE)
810                         (void) nfs_catnap(PZERO, error, "nfs_close");
811         } while (error == NFSERR_GRACE);
812         NFSLOCKCLSTATE();
813         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
814
815         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
816                 nfscl_freelockowner(lp, 0);
817         nfscl_freeopen(op, 0);
818         NFSUNLOCKCLSTATE();
819         NFSFREECRED(tcred);
820 }
821
822 /*
823  * The actual Close RPC.
824  */
825 APPLESTATIC int
826 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
827     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
828     int syscred)
829 {
830         u_int32_t *tl;
831         int error;
832
833         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
834             op->nfso_fhlen, NULL, NULL, 0, 0);
835         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
836         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
837         if (NFSHASNFSV4N(nmp))
838                 *tl++ = 0;
839         else
840                 *tl++ = op->nfso_stateid.seqid;
841         *tl++ = op->nfso_stateid.other[0];
842         *tl++ = op->nfso_stateid.other[1];
843         *tl = op->nfso_stateid.other[2];
844         if (syscred)
845                 nd->nd_flag |= ND_USEGSSNAME;
846         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
847             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
848         if (error)
849                 return (error);
850         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
851         if (nd->nd_repstat == 0)
852                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
853         error = nd->nd_repstat;
854         if (error == NFSERR_STALESTATEID)
855                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
856 nfsmout:
857         mbuf_freem(nd->nd_mrep);
858         return (error);
859 }
860
861 /*
862  * V4 Open Confirm RPC.
863  */
864 APPLESTATIC int
865 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
866     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
867 {
868         u_int32_t *tl;
869         struct nfsrv_descript nfsd, *nd = &nfsd;
870         struct nfsmount *nmp;
871         int error;
872
873         nmp = VFSTONFS(vnode_mount(vp));
874         if (NFSHASNFSV4N(nmp))
875                 return (0);             /* No confirmation for NFSv4.1. */
876         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
877             0, 0);
878         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
879         *tl++ = op->nfso_stateid.seqid;
880         *tl++ = op->nfso_stateid.other[0];
881         *tl++ = op->nfso_stateid.other[1];
882         *tl++ = op->nfso_stateid.other[2];
883         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
884         error = nfscl_request(nd, vp, p, cred, NULL);
885         if (error)
886                 return (error);
887         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
888         if (!nd->nd_repstat) {
889                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
890                 op->nfso_stateid.seqid = *tl++;
891                 op->nfso_stateid.other[0] = *tl++;
892                 op->nfso_stateid.other[1] = *tl++;
893                 op->nfso_stateid.other[2] = *tl;
894         }
895         error = nd->nd_repstat;
896         if (error == NFSERR_STALESTATEID)
897                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
898 nfsmout:
899         mbuf_freem(nd->nd_mrep);
900         return (error);
901 }
902
903 /*
904  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
905  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
906  */
907 APPLESTATIC int
908 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
909     struct ucred *cred, NFSPROC_T *p)
910 {
911         u_int32_t *tl;
912         struct nfsrv_descript nfsd;
913         struct nfsrv_descript *nd = &nfsd;
914         nfsattrbit_t attrbits;
915         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
916         u_short port;
917         int error, isinet6 = 0, callblen;
918         nfsquad_t confirm;
919         u_int32_t lease;
920         static u_int32_t rev = 0;
921         struct nfsclds *dsp;
922         struct in6_addr a6;
923         struct nfsclsession *tsep;
924
925         if (nfsboottime.tv_sec == 0)
926                 NFSSETBOOTTIME(nfsboottime);
927         clp->nfsc_rev = rev++;
928         if (NFSHASNFSV4N(nmp)) {
929                 /*
930                  * Either there was no previous session or the
931                  * previous session has failed, so...
932                  * do an ExchangeID followed by the CreateSession.
933                  */
934                 error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
935                     NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
936                 NFSCL_DEBUG(1, "aft exch=%d\n", error);
937                 if (error == 0)
938                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
939                             &nmp->nm_sockreq,
940                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
941                 if (error == 0) {
942                         NFSLOCKMNT(nmp);
943                         /*
944                          * The old sessions cannot be safely free'd
945                          * here, since they may still be used by
946                          * in-progress RPCs.
947                          */
948                         tsep = NULL;
949                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
950                                 tsep = NFSMNT_MDSSESSION(nmp);
951                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
952                             nfsclds_list);
953                         /*
954                          * Wake up RPCs waiting for a slot on the
955                          * old session. These will then fail with
956                          * NFSERR_BADSESSION and be retried with the
957                          * new session by nfsv4_setsequence().
958                          * Also wakeup() processes waiting for the
959                          * new session.
960                          */
961                         if (tsep != NULL)
962                                 wakeup(&tsep->nfsess_slots);
963                         wakeup(&nmp->nm_sess);
964                         NFSUNLOCKMNT(nmp);
965                 } else
966                         nfscl_freenfsclds(dsp);
967                 NFSCL_DEBUG(1, "aft createsess=%d\n", error);
968                 if (error == 0 && reclaim == 0) {
969                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
970                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
971                         if (error == NFSERR_COMPLETEALREADY ||
972                             error == NFSERR_NOTSUPP)
973                                 /* Ignore this error. */
974                                 error = 0;
975                 }
976                 return (error);
977         }
978
979         /*
980          * Allocate a single session structure for NFSv4.0, because some of
981          * the fields are used by NFSv4.0 although it doesn't do a session.
982          */
983         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
984         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
985         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
986         NFSLOCKMNT(nmp);
987         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
988         tsep = NFSMNT_MDSSESSION(nmp);
989         NFSUNLOCKMNT(nmp);
990
991         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
992         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
993         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
994         *tl = txdr_unsigned(clp->nfsc_rev);
995         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
996
997         /*
998          * set up the callback address
999          */
1000         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1001         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1002         callblen = strlen(nfsv4_callbackaddr);
1003         if (callblen == 0)
1004                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1005         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1006             (callblen > 0 || cp != NULL)) {
1007                 port = htons(nfsv4_cbport);
1008                 cp2 = (u_int8_t *)&port;
1009 #ifdef INET6
1010                 if ((callblen > 0 &&
1011                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1012                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1013
1014                         (void) nfsm_strtom(nd, "tcp6", 4);
1015                         if (callblen == 0) {
1016                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1017                                 ip6add = ip6buf;
1018                         } else {
1019                                 ip6add = nfsv4_callbackaddr;
1020                         }
1021                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1022                             ip6add, cp2[0], cp2[1]);
1023                 } else
1024 #endif
1025                 {
1026                         (void) nfsm_strtom(nd, "tcp", 3);
1027                         if (callblen == 0)
1028                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1029                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1030                                     cp[2], cp[3], cp2[0], cp2[1]);
1031                         else
1032                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1033                                     "%s.%d.%d", nfsv4_callbackaddr,
1034                                     cp2[0], cp2[1]);
1035                 }
1036                 (void) nfsm_strtom(nd, addr, strlen(addr));
1037         } else {
1038                 (void) nfsm_strtom(nd, "tcp", 3);
1039                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1040         }
1041         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1042         *tl = txdr_unsigned(clp->nfsc_cbident);
1043         nd->nd_flag |= ND_USEGSSNAME;
1044         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1045                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1046         if (error)
1047                 return (error);
1048         if (nd->nd_repstat == 0) {
1049             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1050             tsep->nfsess_clientid.lval[0] = *tl++;
1051             tsep->nfsess_clientid.lval[1] = *tl++;
1052             confirm.lval[0] = *tl++;
1053             confirm.lval[1] = *tl;
1054             mbuf_freem(nd->nd_mrep);
1055             nd->nd_mrep = NULL;
1056
1057             /*
1058              * and confirm it.
1059              */
1060             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1061                 NULL, 0, 0);
1062             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1063             *tl++ = tsep->nfsess_clientid.lval[0];
1064             *tl++ = tsep->nfsess_clientid.lval[1];
1065             *tl++ = confirm.lval[0];
1066             *tl = confirm.lval[1];
1067             nd->nd_flag |= ND_USEGSSNAME;
1068             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1069                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1070             if (error)
1071                 return (error);
1072             mbuf_freem(nd->nd_mrep);
1073             nd->nd_mrep = NULL;
1074             if (nd->nd_repstat == 0) {
1075                 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1076                     nmp->nm_fhsize, NULL, NULL, 0, 0);
1077                 NFSZERO_ATTRBIT(&attrbits);
1078                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1079                 (void) nfsrv_putattrbit(nd, &attrbits);
1080                 nd->nd_flag |= ND_USEGSSNAME;
1081                 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1082                     cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1083                 if (error)
1084                     return (error);
1085                 if (nd->nd_repstat == 0) {
1086                     error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1087                         NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1088                     if (error)
1089                         goto nfsmout;
1090                     clp->nfsc_renew = NFSCL_RENEW(lease);
1091                     clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1092                     clp->nfsc_clientidrev++;
1093                     if (clp->nfsc_clientidrev == 0)
1094                         clp->nfsc_clientidrev++;
1095                 }
1096             }
1097         }
1098         error = nd->nd_repstat;
1099 nfsmout:
1100         mbuf_freem(nd->nd_mrep);
1101         return (error);
1102 }
1103
1104 /*
1105  * nfs getattr call.
1106  */
1107 APPLESTATIC int
1108 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1109     struct nfsvattr *nap, void *stuff)
1110 {
1111         struct nfsrv_descript nfsd, *nd = &nfsd;
1112         int error;
1113         nfsattrbit_t attrbits;
1114         
1115         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1116         if (nd->nd_flag & ND_NFSV4) {
1117                 NFSGETATTR_ATTRBIT(&attrbits);
1118                 (void) nfsrv_putattrbit(nd, &attrbits);
1119         }
1120         error = nfscl_request(nd, vp, p, cred, stuff);
1121         if (error)
1122                 return (error);
1123         if (!nd->nd_repstat)
1124                 error = nfsm_loadattr(nd, nap);
1125         else
1126                 error = nd->nd_repstat;
1127         mbuf_freem(nd->nd_mrep);
1128         return (error);
1129 }
1130
1131 /*
1132  * nfs getattr call with non-vnode arguemnts.
1133  */
1134 APPLESTATIC int
1135 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1136     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1137     uint32_t *leasep)
1138 {
1139         struct nfsrv_descript nfsd, *nd = &nfsd;
1140         int error, vers = NFS_VER2;
1141         nfsattrbit_t attrbits;
1142         
1143         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1144         if (nd->nd_flag & ND_NFSV4) {
1145                 vers = NFS_VER4;
1146                 NFSGETATTR_ATTRBIT(&attrbits);
1147                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1148                 (void) nfsrv_putattrbit(nd, &attrbits);
1149         } else if (nd->nd_flag & ND_NFSV3) {
1150                 vers = NFS_VER3;
1151         }
1152         if (syscred)
1153                 nd->nd_flag |= ND_USEGSSNAME;
1154         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1155             NFS_PROG, vers, NULL, 1, xidp, NULL);
1156         if (error)
1157                 return (error);
1158         if (nd->nd_repstat == 0) {
1159                 if ((nd->nd_flag & ND_NFSV4) != 0)
1160                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1161                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1162                             NULL, NULL);
1163                 else
1164                         error = nfsm_loadattr(nd, nap);
1165         } else
1166                 error = nd->nd_repstat;
1167         mbuf_freem(nd->nd_mrep);
1168         return (error);
1169 }
1170
1171 /*
1172  * Do an nfs setattr operation.
1173  */
1174 APPLESTATIC int
1175 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1176     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1177     void *stuff)
1178 {
1179         int error, expireret = 0, openerr, retrycnt;
1180         u_int32_t clidrev = 0, mode;
1181         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1182         struct nfsfh *nfhp;
1183         nfsv4stateid_t stateid;
1184         void *lckp;
1185
1186         if (nmp->nm_clp != NULL)
1187                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1188         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1189                 mode = NFSV4OPEN_ACCESSWRITE;
1190         else
1191                 mode = NFSV4OPEN_ACCESSREAD;
1192         retrycnt = 0;
1193         do {
1194                 lckp = NULL;
1195                 openerr = 1;
1196                 if (NFSHASNFSV4(nmp)) {
1197                         nfhp = VTONFS(vp)->n_fhp;
1198                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1199                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1200                         if (error && vnode_vtype(vp) == VREG &&
1201                             (mode == NFSV4OPEN_ACCESSWRITE ||
1202                              nfstest_openallsetattr)) {
1203                                 /*
1204                                  * No Open stateid, so try and open the file
1205                                  * now.
1206                                  */
1207                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1208                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1209                                             p);
1210                                 else
1211                                         openerr = nfsrpc_open(vp, FREAD, cred,
1212                                             p);
1213                                 if (!openerr)
1214                                         (void) nfscl_getstateid(vp,
1215                                             nfhp->nfh_fh, nfhp->nfh_len,
1216                                             mode, 0, cred, p, &stateid, &lckp);
1217                         }
1218                 }
1219                 if (vap != NULL)
1220                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1221                             rnap, attrflagp, stuff);
1222                 else
1223                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1224                             stuff);
1225                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1226                         NFSLOCKMNT(nmp);
1227                         nmp->nm_state |= NFSSTA_OPENMODE;
1228                         NFSUNLOCKMNT(nmp);
1229                 }
1230                 if (error == NFSERR_STALESTATEID)
1231                         nfscl_initiate_recovery(nmp->nm_clp);
1232                 if (lckp != NULL)
1233                         nfscl_lockderef(lckp);
1234                 if (!openerr)
1235                         (void) nfsrpc_close(vp, 0, p);
1236                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1237                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1238                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1239                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1240                 } else if ((error == NFSERR_EXPIRED ||
1241                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1242                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1243                 }
1244                 retrycnt++;
1245         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1246             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1247             error == NFSERR_BADSESSION ||
1248             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1249             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1250              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1251             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1252              retrycnt < 4));
1253         if (error && retrycnt >= 4)
1254                 error = EIO;
1255         return (error);
1256 }
1257
1258 static int
1259 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1260     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1261     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1262 {
1263         u_int32_t *tl;
1264         struct nfsrv_descript nfsd, *nd = &nfsd;
1265         int error;
1266         nfsattrbit_t attrbits;
1267
1268         *attrflagp = 0;
1269         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1270         if (nd->nd_flag & ND_NFSV4)
1271                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1272         vap->va_type = vnode_vtype(vp);
1273         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1274         if (nd->nd_flag & ND_NFSV3) {
1275                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1276                 *tl = newnfs_false;
1277         } else if (nd->nd_flag & ND_NFSV4) {
1278                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1279                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1280                 NFSGETATTR_ATTRBIT(&attrbits);
1281                 (void) nfsrv_putattrbit(nd, &attrbits);
1282         }
1283         error = nfscl_request(nd, vp, p, cred, stuff);
1284         if (error)
1285                 return (error);
1286         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1287                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1288         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1289                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1290         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1291                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1292         mbuf_freem(nd->nd_mrep);
1293         if (nd->nd_repstat && !error)
1294                 error = nd->nd_repstat;
1295         return (error);
1296 }
1297
1298 /*
1299  * nfs lookup rpc
1300  */
1301 APPLESTATIC int
1302 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1303     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1304     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1305 {
1306         u_int32_t *tl;
1307         struct nfsrv_descript nfsd, *nd = &nfsd;
1308         struct nfsmount *nmp;
1309         struct nfsnode *np;
1310         struct nfsfh *nfhp;
1311         nfsattrbit_t attrbits;
1312         int error = 0, lookupp = 0;
1313
1314         *attrflagp = 0;
1315         *dattrflagp = 0;
1316         if (vnode_vtype(dvp) != VDIR)
1317                 return (ENOTDIR);
1318         nmp = VFSTONFS(vnode_mount(dvp));
1319         if (len > NFS_MAXNAMLEN)
1320                 return (ENAMETOOLONG);
1321         if (NFSHASNFSV4(nmp) && len == 1 &&
1322                 name[0] == '.') {
1323                 /*
1324                  * Just return the current dir's fh.
1325                  */
1326                 np = VTONFS(dvp);
1327                 nfhp = malloc(sizeof (struct nfsfh) +
1328                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1329                 nfhp->nfh_len = np->n_fhp->nfh_len;
1330                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1331                 *nfhpp = nfhp;
1332                 return (0);
1333         }
1334         if (NFSHASNFSV4(nmp) && len == 2 &&
1335                 name[0] == '.' && name[1] == '.') {
1336                 lookupp = 1;
1337                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1338         } else {
1339                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1340                 (void) nfsm_strtom(nd, name, len);
1341         }
1342         if (nd->nd_flag & ND_NFSV4) {
1343                 NFSGETATTR_ATTRBIT(&attrbits);
1344                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1345                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1346                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1347                 (void) nfsrv_putattrbit(nd, &attrbits);
1348         }
1349         error = nfscl_request(nd, dvp, p, cred, stuff);
1350         if (error)
1351                 return (error);
1352         if (nd->nd_repstat) {
1353                 /*
1354                  * When an NFSv4 Lookupp returns ENOENT, it means that
1355                  * the lookup is at the root of an fs, so return this dir.
1356                  */
1357                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1358                     np = VTONFS(dvp);
1359                     nfhp = malloc(sizeof (struct nfsfh) +
1360                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1361                     nfhp->nfh_len = np->n_fhp->nfh_len;
1362                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1363                     *nfhpp = nfhp;
1364                     mbuf_freem(nd->nd_mrep);
1365                     return (0);
1366                 }
1367                 if (nd->nd_flag & ND_NFSV3)
1368                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1369                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1370                     ND_NFSV4) {
1371                         /* Load the directory attributes. */
1372                         error = nfsm_loadattr(nd, dnap);
1373                         if (error == 0)
1374                                 *dattrflagp = 1;
1375                 }
1376                 goto nfsmout;
1377         }
1378         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1379                 /* Load the directory attributes. */
1380                 error = nfsm_loadattr(nd, dnap);
1381                 if (error != 0)
1382                         goto nfsmout;
1383                 *dattrflagp = 1;
1384                 /* Skip over the Lookup and GetFH operation status values. */
1385                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1386         }
1387         error = nfsm_getfh(nd, nfhpp);
1388         if (error)
1389                 goto nfsmout;
1390
1391         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1392         if ((nd->nd_flag & ND_NFSV3) && !error)
1393                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1394 nfsmout:
1395         mbuf_freem(nd->nd_mrep);
1396         if (!error && nd->nd_repstat)
1397                 error = nd->nd_repstat;
1398         return (error);
1399 }
1400
1401 /*
1402  * Do a readlink rpc.
1403  */
1404 APPLESTATIC int
1405 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1406     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1407 {
1408         u_int32_t *tl;
1409         struct nfsrv_descript nfsd, *nd = &nfsd;
1410         struct nfsnode *np = VTONFS(vp);
1411         nfsattrbit_t attrbits;
1412         int error, len, cangetattr = 1;
1413
1414         *attrflagp = 0;
1415         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1416         if (nd->nd_flag & ND_NFSV4) {
1417                 /*
1418                  * And do a Getattr op.
1419                  */
1420                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1421                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1422                 NFSGETATTR_ATTRBIT(&attrbits);
1423                 (void) nfsrv_putattrbit(nd, &attrbits);
1424         }
1425         error = nfscl_request(nd, vp, p, cred, stuff);
1426         if (error)
1427                 return (error);
1428         if (nd->nd_flag & ND_NFSV3)
1429                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1430         if (!nd->nd_repstat && !error) {
1431                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1432                 /*
1433                  * This seems weird to me, but must have been added to
1434                  * FreeBSD for some reason. The only thing I can think of
1435                  * is that there was/is some server that replies with
1436                  * more link data than it should?
1437                  */
1438                 if (len == NFS_MAXPATHLEN) {
1439                         NFSLOCKNODE(np);
1440                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1441                                 len = np->n_size;
1442                                 cangetattr = 0;
1443                         }
1444                         NFSUNLOCKNODE(np);
1445                 }
1446                 error = nfsm_mbufuio(nd, uiop, len);
1447                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1448                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1449         }
1450         if (nd->nd_repstat && !error)
1451                 error = nd->nd_repstat;
1452 nfsmout:
1453         mbuf_freem(nd->nd_mrep);
1454         return (error);
1455 }
1456
1457 /*
1458  * Read operation.
1459  */
1460 APPLESTATIC int
1461 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1462     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1463 {
1464         int error, expireret = 0, retrycnt;
1465         u_int32_t clidrev = 0;
1466         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1467         struct nfsnode *np = VTONFS(vp);
1468         struct ucred *newcred;
1469         struct nfsfh *nfhp = NULL;
1470         nfsv4stateid_t stateid;
1471         void *lckp;
1472
1473         if (nmp->nm_clp != NULL)
1474                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1475         newcred = cred;
1476         if (NFSHASNFSV4(nmp)) {
1477                 nfhp = np->n_fhp;
1478                 newcred = NFSNEWCRED(cred);
1479         }
1480         retrycnt = 0;
1481         do {
1482                 lckp = NULL;
1483                 if (NFSHASNFSV4(nmp))
1484                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1485                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1486                             &lckp);
1487                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1488                     attrflagp, stuff);
1489                 if (error == NFSERR_OPENMODE) {
1490                         NFSLOCKMNT(nmp);
1491                         nmp->nm_state |= NFSSTA_OPENMODE;
1492                         NFSUNLOCKMNT(nmp);
1493                 }
1494                 if (error == NFSERR_STALESTATEID)
1495                         nfscl_initiate_recovery(nmp->nm_clp);
1496                 if (lckp != NULL)
1497                         nfscl_lockderef(lckp);
1498                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1499                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1500                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1501                         (void) nfs_catnap(PZERO, error, "nfs_read");
1502                 } else if ((error == NFSERR_EXPIRED ||
1503                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1504                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1505                 }
1506                 retrycnt++;
1507         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1508             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1509             error == NFSERR_BADSESSION ||
1510             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1511             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1512              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1513             (error == NFSERR_OPENMODE && retrycnt < 4));
1514         if (error && retrycnt >= 4)
1515                 error = EIO;
1516         if (NFSHASNFSV4(nmp))
1517                 NFSFREECRED(newcred);
1518         return (error);
1519 }
1520
1521 /*
1522  * The actual read RPC.
1523  */
1524 static int
1525 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1526     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1527     int *attrflagp, void *stuff)
1528 {
1529         u_int32_t *tl;
1530         int error = 0, len, retlen, tsiz, eof = 0;
1531         struct nfsrv_descript nfsd;
1532         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1533         struct nfsrv_descript *nd = &nfsd;
1534         int rsize;
1535         off_t tmp_off;
1536
1537         *attrflagp = 0;
1538         tsiz = uio_uio_resid(uiop);
1539         tmp_off = uiop->uio_offset + tsiz;
1540         NFSLOCKMNT(nmp);
1541         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1542                 NFSUNLOCKMNT(nmp);
1543                 return (EFBIG);
1544         }
1545         rsize = nmp->nm_rsize;
1546         NFSUNLOCKMNT(nmp);
1547         nd->nd_mrep = NULL;
1548         while (tsiz > 0) {
1549                 *attrflagp = 0;
1550                 len = (tsiz > rsize) ? rsize : tsiz;
1551                 NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1552                 if (nd->nd_flag & ND_NFSV4)
1553                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1554                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1555                 if (nd->nd_flag & ND_NFSV2) {
1556                         *tl++ = txdr_unsigned(uiop->uio_offset);
1557                         *tl++ = txdr_unsigned(len);
1558                         *tl = 0;
1559                 } else {
1560                         txdr_hyper(uiop->uio_offset, tl);
1561                         *(tl + 2) = txdr_unsigned(len);
1562                 }
1563                 /*
1564                  * Since I can't do a Getattr for NFSv4 for Write, there
1565                  * doesn't seem any point in doing one here, either.
1566                  * (See the comment in nfsrpc_writerpc() for more info.)
1567                  */
1568                 error = nfscl_request(nd, vp, p, cred, stuff);
1569                 if (error)
1570                         return (error);
1571                 if (nd->nd_flag & ND_NFSV3) {
1572                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1573                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1574                         error = nfsm_loadattr(nd, nap);
1575                         if (!error)
1576                                 *attrflagp = 1;
1577                 }
1578                 if (nd->nd_repstat || error) {
1579                         if (!error)
1580                                 error = nd->nd_repstat;
1581                         goto nfsmout;
1582                 }
1583                 if (nd->nd_flag & ND_NFSV3) {
1584                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1585                         eof = fxdr_unsigned(int, *(tl + 1));
1586                 } else if (nd->nd_flag & ND_NFSV4) {
1587                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1588                         eof = fxdr_unsigned(int, *tl);
1589                 }
1590                 NFSM_STRSIZ(retlen, len);
1591                 error = nfsm_mbufuio(nd, uiop, retlen);
1592                 if (error)
1593                         goto nfsmout;
1594                 mbuf_freem(nd->nd_mrep);
1595                 nd->nd_mrep = NULL;
1596                 tsiz -= retlen;
1597                 if (!(nd->nd_flag & ND_NFSV2)) {
1598                         if (eof || retlen == 0)
1599                                 tsiz = 0;
1600                 } else if (retlen < len)
1601                         tsiz = 0;
1602         }
1603         return (0);
1604 nfsmout:
1605         if (nd->nd_mrep != NULL)
1606                 mbuf_freem(nd->nd_mrep);
1607         return (error);
1608 }
1609
1610 /*
1611  * nfs write operation
1612  * When called_from_strategy != 0, it should return EIO for an error that
1613  * indicates recovery is in progress, so that the buffer will be left
1614  * dirty and be written back to the server later. If it loops around,
1615  * the recovery thread could get stuck waiting for the buffer and recovery
1616  * will then deadlock.
1617  */
1618 APPLESTATIC int
1619 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1620     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1621     void *stuff, int called_from_strategy)
1622 {
1623         int error, expireret = 0, retrycnt, nostateid;
1624         u_int32_t clidrev = 0;
1625         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1626         struct nfsnode *np = VTONFS(vp);
1627         struct ucred *newcred;
1628         struct nfsfh *nfhp = NULL;
1629         nfsv4stateid_t stateid;
1630         void *lckp;
1631
1632         *must_commit = 0;
1633         if (nmp->nm_clp != NULL)
1634                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1635         newcred = cred;
1636         if (NFSHASNFSV4(nmp)) {
1637                 newcred = NFSNEWCRED(cred);
1638                 nfhp = np->n_fhp;
1639         }
1640         retrycnt = 0;
1641         do {
1642                 lckp = NULL;
1643                 nostateid = 0;
1644                 if (NFSHASNFSV4(nmp)) {
1645                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1646                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1647                             &lckp);
1648                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1649                             stateid.other[2] == 0) {
1650                                 nostateid = 1;
1651                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1652                         }
1653                 }
1654
1655                 /*
1656                  * If there is no stateid for NFSv4, it means this is an
1657                  * extraneous write after close. Basically a poorly
1658                  * implemented buffer cache. Just don't do the write.
1659                  */
1660                 if (nostateid)
1661                         error = 0;
1662                 else
1663                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1664                             newcred, &stateid, p, nap, attrflagp, stuff);
1665                 if (error == NFSERR_STALESTATEID)
1666                         nfscl_initiate_recovery(nmp->nm_clp);
1667                 if (lckp != NULL)
1668                         nfscl_lockderef(lckp);
1669                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1670                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1671                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1672                         (void) nfs_catnap(PZERO, error, "nfs_write");
1673                 } else if ((error == NFSERR_EXPIRED ||
1674                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1675                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1676                 }
1677                 retrycnt++;
1678         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1679             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1680               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1681             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1682             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1683              expireret == 0 && clidrev != 0 && retrycnt < 4));
1684         if (error != 0 && (retrycnt >= 4 ||
1685             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1686               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1687                 error = EIO;
1688         if (NFSHASNFSV4(nmp))
1689                 NFSFREECRED(newcred);
1690         return (error);
1691 }
1692
1693 /*
1694  * The actual write RPC.
1695  */
1696 static int
1697 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1698     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1699     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1700 {
1701         u_int32_t *tl;
1702         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1703         struct nfsnode *np = VTONFS(vp);
1704         int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1705         int wccflag = 0, wsize;
1706         int32_t backup;
1707         struct nfsrv_descript nfsd;
1708         struct nfsrv_descript *nd = &nfsd;
1709         nfsattrbit_t attrbits;
1710         off_t tmp_off;
1711
1712         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1713         *attrflagp = 0;
1714         tsiz = uio_uio_resid(uiop);
1715         tmp_off = uiop->uio_offset + tsiz;
1716         NFSLOCKMNT(nmp);
1717         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1718                 NFSUNLOCKMNT(nmp);
1719                 return (EFBIG);
1720         }
1721         wsize = nmp->nm_wsize;
1722         NFSUNLOCKMNT(nmp);
1723         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1724         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1725         while (tsiz > 0) {
1726                 *attrflagp = 0;
1727                 len = (tsiz > wsize) ? wsize : tsiz;
1728                 NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1729                 if (nd->nd_flag & ND_NFSV4) {
1730                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1731                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1732                         txdr_hyper(uiop->uio_offset, tl);
1733                         tl += 2;
1734                         *tl++ = txdr_unsigned(*iomode);
1735                         *tl = txdr_unsigned(len);
1736                 } else if (nd->nd_flag & ND_NFSV3) {
1737                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1738                         txdr_hyper(uiop->uio_offset, tl);
1739                         tl += 2;
1740                         *tl++ = txdr_unsigned(len);
1741                         *tl++ = txdr_unsigned(*iomode);
1742                         *tl = txdr_unsigned(len);
1743                 } else {
1744                         u_int32_t x;
1745
1746                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1747                         /*
1748                          * Not sure why someone changed this, since the
1749                          * RFC clearly states that "beginoffset" and
1750                          * "totalcount" are ignored, but it wouldn't
1751                          * surprise me if there's a busted server out there.
1752                          */
1753                         /* Set both "begin" and "current" to non-garbage. */
1754                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1755                         *tl++ = x;      /* "begin offset" */
1756                         *tl++ = x;      /* "current offset" */
1757                         x = txdr_unsigned(len);
1758                         *tl++ = x;      /* total to this offset */
1759                         *tl = x;        /* size of this write */
1760
1761                 }
1762                 nfsm_uiombuf(nd, uiop, len);
1763                 /*
1764                  * Although it is tempting to do a normal Getattr Op in the
1765                  * NFSv4 compound, the result can be a nearly hung client
1766                  * system if the Getattr asks for Owner and/or OwnerGroup.
1767                  * It occurs when the client can't map either the Owner or
1768                  * Owner_group name in the Getattr reply to a uid/gid. When
1769                  * there is a cache miss, the kernel does an upcall to the
1770                  * nfsuserd. Then, it can try and read the local /etc/passwd
1771                  * or /etc/group file. It can then block in getnewbuf(),
1772                  * waiting for dirty writes to be pushed to the NFS server.
1773                  * The only reason this doesn't result in a complete
1774                  * deadlock, is that the upcall times out and allows
1775                  * the write to complete. However, progress is so slow
1776                  * that it might just as well be deadlocked.
1777                  * As such, we get the rest of the attributes, but not
1778                  * Owner or Owner_group.
1779                  * nb: nfscl_loadattrcache() needs to be told that these
1780                  *     partial attributes from a write rpc are being
1781                  *     passed in, via a argument flag.
1782                  */
1783                 if (nd->nd_flag & ND_NFSV4) {
1784                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1785                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1786                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1787                         (void) nfsrv_putattrbit(nd, &attrbits);
1788                 }
1789                 error = nfscl_request(nd, vp, p, cred, stuff);
1790                 if (error)
1791                         return (error);
1792                 if (nd->nd_repstat) {
1793                         /*
1794                          * In case the rpc gets retried, roll
1795                          * the uio fileds changed by nfsm_uiombuf()
1796                          * back.
1797                          */
1798                         uiop->uio_offset -= len;
1799                         uio_uio_resid_add(uiop, len);
1800                         uio_iov_base_add(uiop, -len);
1801                         uio_iov_len_add(uiop, len);
1802                 }
1803                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1804                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1805                             &wccflag, stuff);
1806                         if (error)
1807                                 goto nfsmout;
1808                 }
1809                 if (!nd->nd_repstat) {
1810                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1811                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1812                                         + NFSX_VERF);
1813                                 rlen = fxdr_unsigned(int, *tl++);
1814                                 if (rlen == 0) {
1815                                         error = NFSERR_IO;
1816                                         goto nfsmout;
1817                                 } else if (rlen < len) {
1818                                         backup = len - rlen;
1819                                         uio_iov_base_add(uiop, -(backup));
1820                                         uio_iov_len_add(uiop, backup);
1821                                         uiop->uio_offset -= backup;
1822                                         uio_uio_resid_add(uiop, backup);
1823                                         len = rlen;
1824                                 }
1825                                 commit = fxdr_unsigned(int, *tl++);
1826
1827                                 /*
1828                                  * Return the lowest commitment level
1829                                  * obtained by any of the RPCs.
1830                                  */
1831                                 if (committed == NFSWRITE_FILESYNC)
1832                                         committed = commit;
1833                                 else if (committed == NFSWRITE_DATASYNC &&
1834                                         commit == NFSWRITE_UNSTABLE)
1835                                         committed = commit;
1836                                 NFSLOCKMNT(nmp);
1837                                 if (!NFSHASWRITEVERF(nmp)) {
1838                                         NFSBCOPY((caddr_t)tl,
1839                                             (caddr_t)&nmp->nm_verf[0],
1840                                             NFSX_VERF);
1841                                         NFSSETWRITEVERF(nmp);
1842                                 } else if (NFSBCMP(tl, nmp->nm_verf,
1843                                     NFSX_VERF)) {
1844                                         *must_commit = 1;
1845                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1846                                 }
1847                                 NFSUNLOCKMNT(nmp);
1848                         }
1849                         if (nd->nd_flag & ND_NFSV4)
1850                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1851                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1852                                 error = nfsm_loadattr(nd, nap);
1853                                 if (!error)
1854                                         *attrflagp = NFS_LATTR_NOSHRINK;
1855                         }
1856                 } else {
1857                         error = nd->nd_repstat;
1858                 }
1859                 if (error)
1860                         goto nfsmout;
1861                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1862                 mbuf_freem(nd->nd_mrep);
1863                 nd->nd_mrep = NULL;
1864                 tsiz -= len;
1865         }
1866 nfsmout:
1867         if (nd->nd_mrep != NULL)
1868                 mbuf_freem(nd->nd_mrep);
1869         *iomode = committed;
1870         if (nd->nd_repstat && !error)
1871                 error = nd->nd_repstat;
1872         return (error);
1873 }
1874
1875 /*
1876  * nfs mknod rpc
1877  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1878  * mode set to specify the file type and the size field for rdev.
1879  */
1880 APPLESTATIC int
1881 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1882     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1883     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1884     int *attrflagp, int *dattrflagp, void *dstuff)
1885 {
1886         u_int32_t *tl;
1887         int error = 0;
1888         struct nfsrv_descript nfsd, *nd = &nfsd;
1889         nfsattrbit_t attrbits;
1890
1891         *nfhpp = NULL;
1892         *attrflagp = 0;
1893         *dattrflagp = 0;
1894         if (namelen > NFS_MAXNAMLEN)
1895                 return (ENAMETOOLONG);
1896         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1897         if (nd->nd_flag & ND_NFSV4) {
1898                 if (vtyp == VBLK || vtyp == VCHR) {
1899                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1900                         *tl++ = vtonfsv34_type(vtyp);
1901                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1902                         *tl = txdr_unsigned(NFSMINOR(rdev));
1903                 } else {
1904                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1905                         *tl = vtonfsv34_type(vtyp);
1906                 }
1907         }
1908         (void) nfsm_strtom(nd, name, namelen);
1909         if (nd->nd_flag & ND_NFSV3) {
1910                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1911                 *tl = vtonfsv34_type(vtyp);
1912         }
1913         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1914                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
1915         if ((nd->nd_flag & ND_NFSV3) &&
1916             (vtyp == VCHR || vtyp == VBLK)) {
1917                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1918                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1919                 *tl = txdr_unsigned(NFSMINOR(rdev));
1920         }
1921         if (nd->nd_flag & ND_NFSV4) {
1922                 NFSGETATTR_ATTRBIT(&attrbits);
1923                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1924                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1925                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1926                 (void) nfsrv_putattrbit(nd, &attrbits);
1927         }
1928         if (nd->nd_flag & ND_NFSV2)
1929                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1930         error = nfscl_request(nd, dvp, p, cred, dstuff);
1931         if (error)
1932                 return (error);
1933         if (nd->nd_flag & ND_NFSV4)
1934                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1935         if (!nd->nd_repstat) {
1936                 if (nd->nd_flag & ND_NFSV4) {
1937                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1938                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1939                         if (error)
1940                                 goto nfsmout;
1941                 }
1942                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1943                 if (error)
1944                         goto nfsmout;
1945         }
1946         if (nd->nd_flag & ND_NFSV3)
1947                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1948         if (!error && nd->nd_repstat)
1949                 error = nd->nd_repstat;
1950 nfsmout:
1951         mbuf_freem(nd->nd_mrep);
1952         return (error);
1953 }
1954
1955 /*
1956  * nfs file create call
1957  * Mostly just call the approriate routine. (I separated out v4, so that
1958  * error recovery wouldn't be as difficult.)
1959  */
1960 APPLESTATIC int
1961 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1962     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1963     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1964     int *attrflagp, int *dattrflagp, void *dstuff)
1965 {
1966         int error = 0, newone, expireret = 0, retrycnt, unlocked;
1967         struct nfsclowner *owp;
1968         struct nfscldeleg *dp;
1969         struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1970         u_int32_t clidrev;
1971
1972         if (NFSHASNFSV4(nmp)) {
1973             retrycnt = 0;
1974             do {
1975                 dp = NULL;
1976                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1977                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1978                     NULL, 1);
1979                 if (error)
1980                         return (error);
1981                 if (nmp->nm_clp != NULL)
1982                         clidrev = nmp->nm_clp->nfsc_clientidrev;
1983                 else
1984                         clidrev = 0;
1985                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
1986                     nfs_numnfscbd == 0 || retrycnt > 0)
1987                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
1988                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1989                           attrflagp, dattrflagp, dstuff, &unlocked);
1990                 else
1991                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
1992                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1993                           attrflagp, dattrflagp, dstuff, &unlocked);
1994                 /*
1995                  * There is no need to invalidate cached attributes here,
1996                  * since new post-delegation issue attributes are always
1997                  * returned by nfsrpc_createv4() and these will update the
1998                  * attribute cache.
1999                  */
2000                 if (dp != NULL)
2001                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2002                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2003                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2004                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2005                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2006                     error == NFSERR_BADSESSION) {
2007                         (void) nfs_catnap(PZERO, error, "nfs_open");
2008                 } else if ((error == NFSERR_EXPIRED ||
2009                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2010                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2011                         retrycnt++;
2012                 }
2013             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2014                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2015                 error == NFSERR_BADSESSION ||
2016                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2017                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2018             if (error && retrycnt >= 4)
2019                     error = EIO;
2020         } else {
2021                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2022                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2023                     dstuff);
2024         }
2025         return (error);
2026 }
2027
2028 /*
2029  * The create rpc for v2 and 3.
2030  */
2031 static int
2032 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2033     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2034     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2035     int *attrflagp, int *dattrflagp, void *dstuff)
2036 {
2037         u_int32_t *tl;
2038         int error = 0;
2039         struct nfsrv_descript nfsd, *nd = &nfsd;
2040
2041         *nfhpp = NULL;
2042         *attrflagp = 0;
2043         *dattrflagp = 0;
2044         if (namelen > NFS_MAXNAMLEN)
2045                 return (ENAMETOOLONG);
2046         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2047         (void) nfsm_strtom(nd, name, namelen);
2048         if (nd->nd_flag & ND_NFSV3) {
2049                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2050                 if (fmode & O_EXCL) {
2051                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2052                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2053                         *tl++ = cverf.lval[0];
2054                         *tl = cverf.lval[1];
2055                 } else {
2056                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2057                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2058                 }
2059         } else {
2060                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2061         }
2062         error = nfscl_request(nd, dvp, p, cred, dstuff);
2063         if (error)
2064                 return (error);
2065         if (nd->nd_repstat == 0) {
2066                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2067                 if (error)
2068                         goto nfsmout;
2069         }
2070         if (nd->nd_flag & ND_NFSV3)
2071                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2072         if (nd->nd_repstat != 0 && error == 0)
2073                 error = nd->nd_repstat;
2074 nfsmout:
2075         mbuf_freem(nd->nd_mrep);
2076         return (error);
2077 }
2078
2079 static int
2080 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2081     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2082     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2083     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2084     int *dattrflagp, void *dstuff, int *unlockedp)
2085 {
2086         u_int32_t *tl;
2087         int error = 0, deleg, newone, ret, acesize, limitby;
2088         struct nfsrv_descript nfsd, *nd = &nfsd;
2089         struct nfsclopen *op;
2090         struct nfscldeleg *dp = NULL;
2091         struct nfsnode *np;
2092         struct nfsfh *nfhp;
2093         nfsattrbit_t attrbits;
2094         nfsv4stateid_t stateid;
2095         u_int32_t rflags;
2096         struct nfsmount *nmp;
2097         struct nfsclsession *tsep;
2098
2099         nmp = VFSTONFS(dvp->v_mount);
2100         np = VTONFS(dvp);
2101         *unlockedp = 0;
2102         *nfhpp = NULL;
2103         *dpp = NULL;
2104         *attrflagp = 0;
2105         *dattrflagp = 0;
2106         if (namelen > NFS_MAXNAMLEN)
2107                 return (ENAMETOOLONG);
2108         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2109         /*
2110          * For V4, this is actually an Open op.
2111          */
2112         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2113         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2114         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2115             NFSV4OPEN_ACCESSREAD);
2116         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2117         tsep = nfsmnt_mdssession(nmp);
2118         *tl++ = tsep->nfsess_clientid.lval[0];
2119         *tl = tsep->nfsess_clientid.lval[1];
2120         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2121         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2122         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2123         if (fmode & O_EXCL) {
2124                 if (NFSHASNFSV4N(nmp)) {
2125                         if (NFSHASSESSPERSIST(nmp)) {
2126                                 /* Use GUARDED for persistent sessions. */
2127                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2128                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2129                         } else {
2130                                 /* Otherwise, use EXCLUSIVE4_1. */
2131                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2132                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2133                                 *tl++ = cverf.lval[0];
2134                                 *tl = cverf.lval[1];
2135                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2136                         }
2137                 } else {
2138                         /* NFSv4.0 */
2139                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2140                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2141                         *tl++ = cverf.lval[0];
2142                         *tl = cverf.lval[1];
2143                 }
2144         } else {
2145                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2146                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2147         }
2148         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2149         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2150         (void) nfsm_strtom(nd, name, namelen);
2151         /* Get the new file's handle and attributes. */
2152         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2153         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2154         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2155         NFSGETATTR_ATTRBIT(&attrbits);
2156         (void) nfsrv_putattrbit(nd, &attrbits);
2157         /* Get the directory's post-op attributes. */
2158         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2159         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2160         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2161         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2162         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2163         (void) nfsrv_putattrbit(nd, &attrbits);
2164         error = nfscl_request(nd, dvp, p, cred, dstuff);
2165         if (error)
2166                 return (error);
2167         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2168         if (nd->nd_repstat == 0) {
2169                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2170                     6 * NFSX_UNSIGNED);
2171                 stateid.seqid = *tl++;
2172                 stateid.other[0] = *tl++;
2173                 stateid.other[1] = *tl++;
2174                 stateid.other[2] = *tl;
2175                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2176                 (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2177                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2178                 deleg = fxdr_unsigned(int, *tl);
2179                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2180                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2181                         if (!(owp->nfsow_clp->nfsc_flags &
2182                               NFSCLFLAGS_FIRSTDELEG))
2183                                 owp->nfsow_clp->nfsc_flags |=
2184                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2185                         dp = malloc(
2186                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2187                             M_NFSCLDELEG, M_WAITOK);
2188                         LIST_INIT(&dp->nfsdl_owner);
2189                         LIST_INIT(&dp->nfsdl_lock);
2190                         dp->nfsdl_clp = owp->nfsow_clp;
2191                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2192                         nfscl_lockinit(&dp->nfsdl_rwlock);
2193                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2194                             NFSX_UNSIGNED);
2195                         dp->nfsdl_stateid.seqid = *tl++;
2196                         dp->nfsdl_stateid.other[0] = *tl++;
2197                         dp->nfsdl_stateid.other[1] = *tl++;
2198                         dp->nfsdl_stateid.other[2] = *tl++;
2199                         ret = fxdr_unsigned(int, *tl);
2200                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2201                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2202                                 /*
2203                                  * Indicates how much the file can grow.
2204                                  */
2205                                 NFSM_DISSECT(tl, u_int32_t *,
2206                                     3 * NFSX_UNSIGNED);
2207                                 limitby = fxdr_unsigned(int, *tl++);
2208                                 switch (limitby) {
2209                                 case NFSV4OPEN_LIMITSIZE:
2210                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2211                                         break;
2212                                 case NFSV4OPEN_LIMITBLOCKS:
2213                                         dp->nfsdl_sizelimit =
2214                                             fxdr_unsigned(u_int64_t, *tl++);
2215                                         dp->nfsdl_sizelimit *=
2216                                             fxdr_unsigned(u_int64_t, *tl);
2217                                         break;
2218                                 default:
2219                                         error = NFSERR_BADXDR;
2220                                         goto nfsmout;
2221                                 }
2222                         } else {
2223                                 dp->nfsdl_flags = NFSCLDL_READ;
2224                         }
2225                         if (ret)
2226                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2227                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2228                             &acesize, p);
2229                         if (error)
2230                                 goto nfsmout;
2231                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2232                         error = NFSERR_BADXDR;
2233                         goto nfsmout;
2234                 }
2235                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2236                 if (error)
2237                         goto nfsmout;
2238                 /* Get rid of the PutFH and Getattr status values. */
2239                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2240                 /* Load the directory attributes. */
2241                 error = nfsm_loadattr(nd, dnap);
2242                 if (error)
2243                         goto nfsmout;
2244                 *dattrflagp = 1;
2245                 if (dp != NULL && *attrflagp) {
2246                         dp->nfsdl_change = nnap->na_filerev;
2247                         dp->nfsdl_modtime = nnap->na_mtime;
2248                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2249                 }
2250                 /*
2251                  * We can now complete the Open state.
2252                  */
2253                 nfhp = *nfhpp;
2254                 if (dp != NULL) {
2255                         dp->nfsdl_fhlen = nfhp->nfh_len;
2256                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2257                 }
2258                 /*
2259                  * Get an Open structure that will be
2260                  * attached to the OpenOwner, acquired already.
2261                  */
2262                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2263                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2264                     cred, p, NULL, &op, &newone, NULL, 0);
2265                 if (error)
2266                         goto nfsmout;
2267                 op->nfso_stateid = stateid;
2268                 newnfs_copyincred(cred, &op->nfso_cred);
2269                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2270                     do {
2271                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2272                             nfhp->nfh_len, op, cred, p);
2273                         if (ret == NFSERR_DELAY)
2274                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2275                     } while (ret == NFSERR_DELAY);
2276                     error = ret;
2277                 }
2278
2279                 /*
2280                  * If the server is handing out delegations, but we didn't
2281                  * get one because an OpenConfirm was required, try the
2282                  * Open again, to get a delegation. This is a harmless no-op,
2283                  * from a server's point of view.
2284                  */
2285                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2286                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2287                     !error && dp == NULL) {
2288                     do {
2289                         ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2290                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2291                             nfhp->nfh_fh, nfhp->nfh_len,
2292                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2293                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2294                         if (ret == NFSERR_DELAY)
2295                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2296                     } while (ret == NFSERR_DELAY);
2297                     if (ret) {
2298                         if (dp != NULL) {
2299                                 free(dp, M_NFSCLDELEG);
2300                                 dp = NULL;
2301                         }
2302                         if (ret == NFSERR_STALECLIENTID ||
2303                             ret == NFSERR_STALEDONTRECOVER ||
2304                             ret == NFSERR_BADSESSION)
2305                                 error = ret;
2306                     }
2307                 }
2308                 nfscl_openrelease(nmp, op, error, newone);
2309                 *unlockedp = 1;
2310         }
2311         if (nd->nd_repstat != 0 && error == 0)
2312                 error = nd->nd_repstat;
2313         if (error == NFSERR_STALECLIENTID)
2314                 nfscl_initiate_recovery(owp->nfsow_clp);
2315 nfsmout:
2316         if (!error)
2317                 *dpp = dp;
2318         else if (dp != NULL)
2319                 free(dp, M_NFSCLDELEG);
2320         mbuf_freem(nd->nd_mrep);
2321         return (error);
2322 }
2323
2324 /*
2325  * Nfs remove rpc
2326  */
2327 APPLESTATIC int
2328 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2329     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2330     void *dstuff)
2331 {
2332         u_int32_t *tl;
2333         struct nfsrv_descript nfsd, *nd = &nfsd;
2334         struct nfsnode *np;
2335         struct nfsmount *nmp;
2336         nfsv4stateid_t dstateid;
2337         int error, ret = 0, i;
2338
2339         *dattrflagp = 0;
2340         if (namelen > NFS_MAXNAMLEN)
2341                 return (ENAMETOOLONG);
2342         nmp = VFSTONFS(vnode_mount(dvp));
2343 tryagain:
2344         if (NFSHASNFSV4(nmp) && ret == 0) {
2345                 ret = nfscl_removedeleg(vp, p, &dstateid);
2346                 if (ret == 1) {
2347                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2348                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2349                             NFSX_UNSIGNED);
2350                         if (NFSHASNFSV4N(nmp))
2351                                 *tl++ = 0;
2352                         else
2353                                 *tl++ = dstateid.seqid;
2354                         *tl++ = dstateid.other[0];
2355                         *tl++ = dstateid.other[1];
2356                         *tl++ = dstateid.other[2];
2357                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2358                         np = VTONFS(dvp);
2359                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2360                             np->n_fhp->nfh_len, 0);
2361                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2362                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2363                 }
2364         } else {
2365                 ret = 0;
2366         }
2367         if (ret == 0)
2368                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2369         (void) nfsm_strtom(nd, name, namelen);
2370         error = nfscl_request(nd, dvp, p, cred, dstuff);
2371         if (error)
2372                 return (error);
2373         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2374                 /* For NFSv4, parse out any Delereturn replies. */
2375                 if (ret > 0 && nd->nd_repstat != 0 &&
2376                     (nd->nd_flag & ND_NOMOREDATA)) {
2377                         /*
2378                          * If the Delegreturn failed, try again without
2379                          * it. The server will Recall, as required.
2380                          */
2381                         mbuf_freem(nd->nd_mrep);
2382                         goto tryagain;
2383                 }
2384                 for (i = 0; i < (ret * 2); i++) {
2385                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2386                             ND_NFSV4) {
2387                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2388                             if (*(tl + 1))
2389                                 nd->nd_flag |= ND_NOMOREDATA;
2390                         }
2391                 }
2392                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2393         }
2394         if (nd->nd_repstat && !error)
2395                 error = nd->nd_repstat;
2396 nfsmout:
2397         mbuf_freem(nd->nd_mrep);
2398         return (error);
2399 }
2400
2401 /*
2402  * Do an nfs rename rpc.
2403  */
2404 APPLESTATIC int
2405 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2406     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2407     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2408     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2409 {
2410         u_int32_t *tl;
2411         struct nfsrv_descript nfsd, *nd = &nfsd;
2412         struct nfsmount *nmp;
2413         struct nfsnode *np;
2414         nfsattrbit_t attrbits;
2415         nfsv4stateid_t fdstateid, tdstateid;
2416         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2417         
2418         *fattrflagp = 0;
2419         *tattrflagp = 0;
2420         nmp = VFSTONFS(vnode_mount(fdvp));
2421         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2422                 return (ENAMETOOLONG);
2423 tryagain:
2424         if (NFSHASNFSV4(nmp) && ret == 0) {
2425                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2426                     &tdstateid, &gottd, p);
2427                 if (gotfd && gottd) {
2428                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2429                 } else if (gotfd) {
2430                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2431                 } else if (gottd) {
2432                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2433                 }
2434                 if (gotfd) {
2435                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2436                         if (NFSHASNFSV4N(nmp))
2437                                 *tl++ = 0;
2438                         else
2439                                 *tl++ = fdstateid.seqid;
2440                         *tl++ = fdstateid.other[0];
2441                         *tl++ = fdstateid.other[1];
2442                         *tl = fdstateid.other[2];
2443                         if (gottd) {
2444                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2445                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2446                                 np = VTONFS(tvp);
2447                                 (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2448                                     np->n_fhp->nfh_len, 0);
2449                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2450                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2451                         }
2452                 }
2453                 if (gottd) {
2454                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2455                         if (NFSHASNFSV4N(nmp))
2456                                 *tl++ = 0;
2457                         else
2458                                 *tl++ = tdstateid.seqid;
2459                         *tl++ = tdstateid.other[0];
2460                         *tl++ = tdstateid.other[1];
2461                         *tl = tdstateid.other[2];
2462                 }
2463                 if (ret > 0) {
2464                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2465                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2466                         np = VTONFS(fdvp);
2467                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2468                             np->n_fhp->nfh_len, 0);
2469                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2470                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2471                 }
2472         } else {
2473                 ret = 0;
2474         }
2475         if (ret == 0)
2476                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2477         if (nd->nd_flag & ND_NFSV4) {
2478                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2479                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2480                 NFSWCCATTR_ATTRBIT(&attrbits);
2481                 (void) nfsrv_putattrbit(nd, &attrbits);
2482                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2483                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2484                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2485                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2486                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2487                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2488                 (void) nfsrv_putattrbit(nd, &attrbits);
2489                 nd->nd_flag |= ND_V4WCCATTR;
2490                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2491                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2492         }
2493         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2494         if (!(nd->nd_flag & ND_NFSV4))
2495                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2496                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2497         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2498         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2499         if (error)
2500                 return (error);
2501         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2502                 /* For NFSv4, parse out any Delereturn replies. */
2503                 if (ret > 0 && nd->nd_repstat != 0 &&
2504                     (nd->nd_flag & ND_NOMOREDATA)) {
2505                         /*
2506                          * If the Delegreturn failed, try again without
2507                          * it. The server will Recall, as required.
2508                          */
2509                         mbuf_freem(nd->nd_mrep);
2510                         goto tryagain;
2511                 }
2512                 for (i = 0; i < (ret * 2); i++) {
2513                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2514                             ND_NFSV4) {
2515                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2516                             if (*(tl + 1)) {
2517                                 if (i == 0 && ret > 1) {
2518                                     /*
2519                                      * If the Delegreturn failed, try again
2520                                      * without it. The server will Recall, as
2521                                      * required.
2522                                      * If ret > 1, the first iteration of this
2523                                      * loop is the second DelegReturn result.
2524                                      */
2525                                     mbuf_freem(nd->nd_mrep);
2526                                     goto tryagain;
2527                                 } else {
2528                                     nd->nd_flag |= ND_NOMOREDATA;
2529                                 }
2530                             }
2531                         }
2532                 }
2533                 /* Now, the first wcc attribute reply. */
2534                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2535                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2536                         if (*(tl + 1))
2537                                 nd->nd_flag |= ND_NOMOREDATA;
2538                 }
2539                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2540                     fstuff);
2541                 /* and the second wcc attribute reply. */
2542                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2543                     !error) {
2544                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2545                         if (*(tl + 1))
2546                                 nd->nd_flag |= ND_NOMOREDATA;
2547                 }
2548                 if (!error)
2549                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2550                             NULL, tstuff);
2551         }
2552         if (nd->nd_repstat && !error)
2553                 error = nd->nd_repstat;
2554 nfsmout:
2555         mbuf_freem(nd->nd_mrep);
2556         return (error);
2557 }
2558
2559 /*
2560  * nfs hard link create rpc
2561  */
2562 APPLESTATIC int
2563 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2564     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2565     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2566 {
2567         u_int32_t *tl;
2568         struct nfsrv_descript nfsd, *nd = &nfsd;
2569         nfsattrbit_t attrbits;
2570         int error = 0;
2571
2572         *attrflagp = 0;
2573         *dattrflagp = 0;
2574         if (namelen > NFS_MAXNAMLEN)
2575                 return (ENAMETOOLONG);
2576         NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2577         if (nd->nd_flag & ND_NFSV4) {
2578                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2579                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2580         }
2581         (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2582                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2583         if (nd->nd_flag & ND_NFSV4) {
2584                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2585                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2586                 NFSWCCATTR_ATTRBIT(&attrbits);
2587                 (void) nfsrv_putattrbit(nd, &attrbits);
2588                 nd->nd_flag |= ND_V4WCCATTR;
2589                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2590                 *tl = txdr_unsigned(NFSV4OP_LINK);
2591         }
2592         (void) nfsm_strtom(nd, name, namelen);
2593         error = nfscl_request(nd, vp, p, cred, dstuff);
2594         if (error)
2595                 return (error);
2596         if (nd->nd_flag & ND_NFSV3) {
2597                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2598                 if (!error)
2599                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2600                             NULL, dstuff);
2601         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2602                 /*
2603                  * First, parse out the PutFH and Getattr result.
2604                  */
2605                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2606                 if (!(*(tl + 1)))
2607                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2608                 if (*(tl + 1))
2609                         nd->nd_flag |= ND_NOMOREDATA;
2610                 /*
2611                  * Get the pre-op attributes.
2612                  */
2613                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2614         }
2615         if (nd->nd_repstat && !error)
2616                 error = nd->nd_repstat;
2617 nfsmout:
2618         mbuf_freem(nd->nd_mrep);
2619         return (error);
2620 }
2621
2622 /*
2623  * nfs symbolic link create rpc
2624  */
2625 APPLESTATIC int
2626 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2627     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2628     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2629     int *dattrflagp, void *dstuff)
2630 {
2631         u_int32_t *tl;
2632         struct nfsrv_descript nfsd, *nd = &nfsd;
2633         struct nfsmount *nmp;
2634         int slen, error = 0;
2635
2636         *nfhpp = NULL;
2637         *attrflagp = 0;
2638         *dattrflagp = 0;
2639         nmp = VFSTONFS(vnode_mount(dvp));
2640         slen = strlen(target);
2641         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2642                 return (ENAMETOOLONG);
2643         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2644         if (nd->nd_flag & ND_NFSV4) {
2645                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2646                 *tl = txdr_unsigned(NFLNK);
2647                 (void) nfsm_strtom(nd, target, slen);
2648         }
2649         (void) nfsm_strtom(nd, name, namelen);
2650         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2651                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2652         if (!(nd->nd_flag & ND_NFSV4))
2653                 (void) nfsm_strtom(nd, target, slen);
2654         if (nd->nd_flag & ND_NFSV2)
2655                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2656         error = nfscl_request(nd, dvp, p, cred, dstuff);
2657         if (error)
2658                 return (error);
2659         if (nd->nd_flag & ND_NFSV4)
2660                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2661         if ((nd->nd_flag & ND_NFSV3) && !error) {
2662                 if (!nd->nd_repstat)
2663                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2664                 if (!error)
2665                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2666                             NULL, dstuff);
2667         }
2668         if (nd->nd_repstat && !error)
2669                 error = nd->nd_repstat;
2670         mbuf_freem(nd->nd_mrep);
2671         /*
2672          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2673          * Only do this if vfs.nfs.ignore_eexist is set.
2674          * Never do this for NFSv4.1 or later minor versions, since sessions
2675          * should guarantee "exactly once" RPC semantics.
2676          */
2677         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2678             nmp->nm_minorvers == 0))
2679                 error = 0;
2680         return (error);
2681 }
2682
2683 /*
2684  * nfs make dir rpc
2685  */
2686 APPLESTATIC int
2687 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2688     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2689     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2690     int *dattrflagp, void *dstuff)
2691 {
2692         u_int32_t *tl;
2693         struct nfsrv_descript nfsd, *nd = &nfsd;
2694         nfsattrbit_t attrbits;
2695         int error = 0;
2696         struct nfsfh *fhp;
2697         struct nfsmount *nmp;
2698
2699         *nfhpp = NULL;
2700         *attrflagp = 0;
2701         *dattrflagp = 0;
2702         nmp = VFSTONFS(vnode_mount(dvp));
2703         fhp = VTONFS(dvp)->n_fhp;
2704         if (namelen > NFS_MAXNAMLEN)
2705                 return (ENAMETOOLONG);
2706         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2707         if (nd->nd_flag & ND_NFSV4) {
2708                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2709                 *tl = txdr_unsigned(NFDIR);
2710         }
2711         (void) nfsm_strtom(nd, name, namelen);
2712         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2713         if (nd->nd_flag & ND_NFSV4) {
2714                 NFSGETATTR_ATTRBIT(&attrbits);
2715                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2716                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2717                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2718                 (void) nfsrv_putattrbit(nd, &attrbits);
2719                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2720                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2721                 (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2722                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2723                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2724                 (void) nfsrv_putattrbit(nd, &attrbits);
2725         }
2726         error = nfscl_request(nd, dvp, p, cred, dstuff);
2727         if (error)
2728                 return (error);
2729         if (nd->nd_flag & ND_NFSV4)
2730                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2731         if (!nd->nd_repstat && !error) {
2732                 if (nd->nd_flag & ND_NFSV4) {
2733                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2734                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2735                 }
2736                 if (!error)
2737                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2738                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2739                         /* Get rid of the PutFH and Getattr status values. */
2740                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2741                         /* Load the directory attributes. */
2742                         error = nfsm_loadattr(nd, dnap);
2743                         if (error == 0)
2744                                 *dattrflagp = 1;
2745                 }
2746         }
2747         if ((nd->nd_flag & ND_NFSV3) && !error)
2748                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2749         if (nd->nd_repstat && !error)
2750                 error = nd->nd_repstat;
2751 nfsmout:
2752         mbuf_freem(nd->nd_mrep);
2753         /*
2754          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2755          * Only do this if vfs.nfs.ignore_eexist is set.
2756          * Never do this for NFSv4.1 or later minor versions, since sessions
2757          * should guarantee "exactly once" RPC semantics.
2758          */
2759         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2760             nmp->nm_minorvers == 0))
2761                 error = 0;
2762         return (error);
2763 }
2764
2765 /*
2766  * nfs remove directory call
2767  */
2768 APPLESTATIC int
2769 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2770     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2771 {
2772         struct nfsrv_descript nfsd, *nd = &nfsd;
2773         int error = 0;
2774
2775         *dattrflagp = 0;
2776         if (namelen > NFS_MAXNAMLEN)
2777                 return (ENAMETOOLONG);
2778         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2779         (void) nfsm_strtom(nd, name, namelen);
2780         error = nfscl_request(nd, dvp, p, cred, dstuff);
2781         if (error)
2782                 return (error);
2783         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2784                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2785         if (nd->nd_repstat && !error)
2786                 error = nd->nd_repstat;
2787         mbuf_freem(nd->nd_mrep);
2788         /*
2789          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2790          */
2791         if (error == ENOENT)
2792                 error = 0;
2793         return (error);
2794 }
2795
2796 /*
2797  * Readdir rpc.
2798  * Always returns with either uio_resid unchanged, if you are at the
2799  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2800  * filled in.
2801  * I felt this would allow caching of directory blocks more easily
2802  * than returning a pertially filled block.
2803  * Directory offset cookies:
2804  * Oh my, what to do with them...
2805  * I can think of three ways to deal with them:
2806  * 1 - have the layer above these RPCs maintain a map between logical
2807  *     directory byte offsets and the NFS directory offset cookies
2808  * 2 - pass the opaque directory offset cookies up into userland
2809  *     and let the libc functions deal with them, via the system call
2810  * 3 - return them to userland in the "struct dirent", so future versions
2811  *     of libc can use them and do whatever is necessary to make things work
2812  *     above these rpc calls, in the meantime
2813  * For now, I do #3 by "hiding" the directory offset cookies after the
2814  * d_name field in struct dirent. This is space inside d_reclen that
2815  * will be ignored by anything that doesn't know about them.
2816  * The directory offset cookies are filled in as the last 8 bytes of
2817  * each directory entry, after d_name. Someday, the userland libc
2818  * functions may be able to use these. In the meantime, it satisfies
2819  * OpenBSD's requirements for cookies being returned.
2820  * If expects the directory offset cookie for the read to be in uio_offset
2821  * and returns the one for the next entry after this directory block in
2822  * there, as well.
2823  */
2824 APPLESTATIC int
2825 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2826     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2827     int *eofp, void *stuff)
2828 {
2829         int len, left;
2830         struct dirent *dp = NULL;
2831         u_int32_t *tl;
2832         nfsquad_t cookie, ncookie;
2833         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2834         struct nfsnode *dnp = VTONFS(vp);
2835         struct nfsvattr nfsva;
2836         struct nfsrv_descript nfsd, *nd = &nfsd;
2837         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2838         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2839         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2840         char *cp;
2841         nfsattrbit_t attrbits, dattrbits;
2842         u_int32_t rderr, *tl2 = NULL;
2843         size_t tresid;
2844
2845         KASSERT(uiop->uio_iovcnt == 1 &&
2846             (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2847             ("nfs readdirrpc bad uio"));
2848         ncookie.lval[0] = ncookie.lval[1] = 0;
2849         /*
2850          * There is no point in reading a lot more than uio_resid, however
2851          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2852          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2853          * will never make readsize > nm_readdirsize.
2854          */
2855         readsize = nmp->nm_readdirsize;
2856         if (readsize > uio_uio_resid(uiop))
2857                 readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2858
2859         *attrflagp = 0;
2860         if (eofp)
2861                 *eofp = 0;
2862         tresid = uio_uio_resid(uiop);
2863         cookie.lval[0] = cookiep->nfsuquad[0];
2864         cookie.lval[1] = cookiep->nfsuquad[1];
2865         nd->nd_mrep = NULL;
2866
2867         /*
2868          * For NFSv4, first create the "." and ".." entries.
2869          */
2870         if (NFSHASNFSV4(nmp)) {
2871                 reqsize = 6 * NFSX_UNSIGNED;
2872                 NFSGETATTR_ATTRBIT(&dattrbits);
2873                 NFSZERO_ATTRBIT(&attrbits);
2874                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2875                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2876                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2877                     NFSATTRBIT_MOUNTEDONFILEID)) {
2878                         NFSSETBIT_ATTRBIT(&attrbits,
2879                             NFSATTRBIT_MOUNTEDONFILEID);
2880                         gotmnton = 1;
2881                 } else {
2882                         /*
2883                          * Must fake it. Use the fileno, except when the
2884                          * fsid is != to that of the directory. For that
2885                          * case, generate a fake fileno that is not the same.
2886                          */
2887                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2888                         gotmnton = 0;
2889                 }
2890
2891                 /*
2892                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2893                  */
2894                 if (uiop->uio_offset == 0) {
2895                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2896                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2897                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2898                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2899                         (void) nfsrv_putattrbit(nd, &attrbits);
2900                         error = nfscl_request(nd, vp, p, cred, stuff);
2901                         if (error)
2902                             return (error);
2903                         dotfileid = 0;  /* Fake out the compiler. */
2904                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2905                             error = nfsm_loadattr(nd, &nfsva);
2906                             if (error != 0)
2907                                 goto nfsmout;
2908                             dotfileid = nfsva.na_fileid;
2909                         }
2910                         if (nd->nd_repstat == 0) {
2911                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2912                             len = fxdr_unsigned(int, *(tl + 4));
2913                             if (len > 0 && len <= NFSX_V4FHMAX)
2914                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2915                             else
2916                                 error = EPERM;
2917                             if (!error) {
2918                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2919                                 nfsva.na_mntonfileno = UINT64_MAX;
2920                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2921                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2922                                     NULL, NULL, NULL, p, cred);
2923                                 if (error) {
2924                                     dotdotfileid = dotfileid;
2925                                 } else if (gotmnton) {
2926                                     if (nfsva.na_mntonfileno != UINT64_MAX)
2927                                         dotdotfileid = nfsva.na_mntonfileno;
2928                                     else
2929                                         dotdotfileid = nfsva.na_fileid;
2930                                 } else if (nfsva.na_filesid[0] ==
2931                                     dnp->n_vattr.na_filesid[0] &&
2932                                     nfsva.na_filesid[1] ==
2933                                     dnp->n_vattr.na_filesid[1]) {
2934                                     dotdotfileid = nfsva.na_fileid;
2935                                 } else {
2936                                     do {
2937                                         fakefileno--;
2938                                     } while (fakefileno ==
2939                                         nfsva.na_fileid);
2940                                     dotdotfileid = fakefileno;
2941                                 }
2942                             }
2943                         } else if (nd->nd_repstat == NFSERR_NOENT) {
2944                             /*
2945                              * Lookupp returns NFSERR_NOENT when we are
2946                              * at the root, so just use the current dir.
2947                              */
2948                             nd->nd_repstat = 0;
2949                             dotdotfileid = dotfileid;
2950                         } else {
2951                             error = nd->nd_repstat;
2952                         }
2953                         mbuf_freem(nd->nd_mrep);
2954                         if (error)
2955                             return (error);
2956                         nd->nd_mrep = NULL;
2957                         dp = (struct dirent *)uio_iov_base(uiop);
2958                         dp->d_off = 0;
2959                         dp->d_type = DT_DIR;
2960                         dp->d_fileno = dotfileid;
2961                         dp->d_namlen = 1;
2962                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
2963                         dp->d_name[0] = '.';
2964                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2965                         /*
2966                          * Just make these offset cookie 0.
2967                          */
2968                         tl = (u_int32_t *)&dp->d_name[8];
2969                         *tl++ = 0;
2970                         *tl = 0;
2971                         blksiz += dp->d_reclen;
2972                         uio_uio_resid_add(uiop, -(dp->d_reclen));
2973                         uiop->uio_offset += dp->d_reclen;
2974                         uio_iov_base_add(uiop, dp->d_reclen);
2975                         uio_iov_len_add(uiop, -(dp->d_reclen));
2976                         dp = (struct dirent *)uio_iov_base(uiop);
2977                         dp->d_off = 0;
2978                         dp->d_type = DT_DIR;
2979                         dp->d_fileno = dotdotfileid;
2980                         dp->d_namlen = 2;
2981                         *((uint64_t *)dp->d_name) = 0;
2982                         dp->d_name[0] = '.';
2983                         dp->d_name[1] = '.';
2984                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
2985                         /*
2986                          * Just make these offset cookie 0.
2987                          */
2988                         tl = (u_int32_t *)&dp->d_name[8];
2989                         *tl++ = 0;
2990                         *tl = 0;
2991                         blksiz += dp->d_reclen;
2992                         uio_uio_resid_add(uiop, -(dp->d_reclen));
2993                         uiop->uio_offset += dp->d_reclen;
2994                         uio_iov_base_add(uiop, dp->d_reclen);
2995                         uio_iov_len_add(uiop, -(dp->d_reclen));
2996                 }
2997                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2998         } else {
2999                 reqsize = 5 * NFSX_UNSIGNED;
3000         }
3001
3002
3003         /*
3004          * Loop around doing readdir rpc's of size readsize.
3005          * The stopping criteria is EOF or buffer full.
3006          */
3007         while (more_dirs && bigenough) {
3008                 *attrflagp = 0;
3009                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3010                 if (nd->nd_flag & ND_NFSV2) {
3011                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3012                         *tl++ = cookie.lval[1];
3013                         *tl = txdr_unsigned(readsize);
3014                 } else {
3015                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3016                         *tl++ = cookie.lval[0];
3017                         *tl++ = cookie.lval[1];
3018                         if (cookie.qval == 0) {
3019                                 *tl++ = 0;
3020                                 *tl++ = 0;
3021                         } else {
3022                                 NFSLOCKNODE(dnp);
3023                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3024                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3025                                 NFSUNLOCKNODE(dnp);
3026                         }
3027                         if (nd->nd_flag & ND_NFSV4) {
3028                                 *tl++ = txdr_unsigned(readsize);
3029                                 *tl = txdr_unsigned(readsize);
3030                                 (void) nfsrv_putattrbit(nd, &attrbits);
3031                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3032                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3033                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3034                         } else {
3035                                 *tl = txdr_unsigned(readsize);
3036                         }
3037                 }
3038                 error = nfscl_request(nd, vp, p, cred, stuff);
3039                 if (error)
3040                         return (error);
3041                 if (!(nd->nd_flag & ND_NFSV2)) {
3042                         if (nd->nd_flag & ND_NFSV3)
3043                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3044                                     stuff);
3045                         if (!nd->nd_repstat && !error) {
3046                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3047                                 NFSLOCKNODE(dnp);
3048                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3049                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3050                                 NFSUNLOCKNODE(dnp);
3051                         }
3052                 }
3053                 if (nd->nd_repstat || error) {
3054                         if (!error)
3055                                 error = nd->nd_repstat;
3056                         goto nfsmout;
3057                 }
3058                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3059                 more_dirs = fxdr_unsigned(int, *tl);
3060                 if (!more_dirs)
3061                         tryformoredirs = 0;
3062         
3063                 /* loop through the dir entries, doctoring them to 4bsd form */
3064                 while (more_dirs && bigenough) {
3065                         if (nd->nd_flag & ND_NFSV4) {
3066                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3067                                 ncookie.lval[0] = *tl++;
3068                                 ncookie.lval[1] = *tl++;
3069                                 len = fxdr_unsigned(int, *tl);
3070                         } else if (nd->nd_flag & ND_NFSV3) {
3071                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3072                                 nfsva.na_fileid = fxdr_hyper(tl);
3073                                 tl += 2;
3074                                 len = fxdr_unsigned(int, *tl);
3075                         } else {
3076                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3077                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3078                                     *tl++);
3079                                 len = fxdr_unsigned(int, *tl);
3080                         }
3081                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3082                                 error = EBADRPC;
3083                                 goto nfsmout;
3084                         }
3085                         tlen = roundup2(len, 8);
3086                         if (tlen == len)
3087                                 tlen += 8;  /* To ensure null termination. */
3088                         left = DIRBLKSIZ - blksiz;
3089                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3090                                 dp->d_reclen += left;
3091                                 uio_iov_base_add(uiop, left);
3092                                 uio_iov_len_add(uiop, -(left));
3093                                 uio_uio_resid_add(uiop, -(left));
3094                                 uiop->uio_offset += left;
3095                                 blksiz = 0;
3096                         }
3097                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3098                             uio_uio_resid(uiop))
3099                                 bigenough = 0;
3100                         if (bigenough) {
3101                                 dp = (struct dirent *)uio_iov_base(uiop);
3102                                 dp->d_off = 0;
3103                                 dp->d_namlen = len;
3104                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3105                                     NFSX_HYPER;
3106                                 dp->d_type = DT_UNKNOWN;
3107                                 blksiz += dp->d_reclen;
3108                                 if (blksiz == DIRBLKSIZ)
3109                                         blksiz = 0;
3110                                 uio_uio_resid_add(uiop, -(DIRHDSIZ));
3111                                 uiop->uio_offset += DIRHDSIZ;
3112                                 uio_iov_base_add(uiop, DIRHDSIZ);
3113                                 uio_iov_len_add(uiop, -(DIRHDSIZ));
3114                                 error = nfsm_mbufuio(nd, uiop, len);
3115                                 if (error)
3116                                         goto nfsmout;
3117                                 cp = uio_iov_base(uiop);
3118                                 tlen -= len;
3119                                 *cp = '\0';     /* null terminate */
3120                                 cp += tlen;     /* points to cookie storage */
3121                                 tl2 = (u_int32_t *)cp;
3122                                 uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3123                                 uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3124                                 uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3125                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3126                         } else {
3127                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3128                                 if (error)
3129                                         goto nfsmout;
3130                         }
3131                         if (nd->nd_flag & ND_NFSV4) {
3132                                 rderr = 0;
3133                                 nfsva.na_mntonfileno = UINT64_MAX;
3134                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3135                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3136                                     NULL, NULL, &rderr, p, cred);
3137                                 if (error)
3138                                         goto nfsmout;
3139                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3140                         } else if (nd->nd_flag & ND_NFSV3) {
3141                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3142                                 ncookie.lval[0] = *tl++;
3143                                 ncookie.lval[1] = *tl++;
3144                         } else {
3145                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3146                                 ncookie.lval[0] = 0;
3147                                 ncookie.lval[1] = *tl++;
3148                         }
3149                         if (bigenough) {
3150                             if (nd->nd_flag & ND_NFSV4) {
3151                                 if (rderr) {
3152                                     dp->d_fileno = 0;
3153                                 } else {
3154                                     if (gotmnton) {
3155                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3156                                             dp->d_fileno = nfsva.na_mntonfileno;
3157                                         else
3158                                             dp->d_fileno = nfsva.na_fileid;
3159                                     } else if (nfsva.na_filesid[0] ==
3160                                         dnp->n_vattr.na_filesid[0] &&
3161                                         nfsva.na_filesid[1] ==
3162                                         dnp->n_vattr.na_filesid[1]) {
3163                                         dp->d_fileno = nfsva.na_fileid;
3164                                     } else {
3165                                         do {
3166                                             fakefileno--;
3167                                         } while (fakefileno ==
3168                                             nfsva.na_fileid);
3169                                         dp->d_fileno = fakefileno;
3170                                     }
3171                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3172                                 }
3173                             } else {
3174                                 dp->d_fileno = nfsva.na_fileid;
3175                             }
3176                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3177                                 ncookie.lval[0];
3178                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3179                                 ncookie.lval[1];
3180                         }
3181                         more_dirs = fxdr_unsigned(int, *tl);
3182                 }
3183                 /*
3184                  * If at end of rpc data, get the eof boolean
3185                  */
3186                 if (!more_dirs) {
3187                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3188                         eof = fxdr_unsigned(int, *tl);
3189                         if (tryformoredirs)
3190                                 more_dirs = !eof;
3191                         if (nd->nd_flag & ND_NFSV4) {
3192                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3193                                     stuff);
3194                                 if (error)
3195                                         goto nfsmout;
3196                         }
3197                 }
3198                 mbuf_freem(nd->nd_mrep);
3199                 nd->nd_mrep = NULL;
3200         }
3201         /*
3202          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3203          * by increasing d_reclen for the last record.
3204          */
3205         if (blksiz > 0) {
3206                 left = DIRBLKSIZ - blksiz;
3207                 dp->d_reclen += left;
3208                 uio_iov_base_add(uiop, left);
3209                 uio_iov_len_add(uiop, -(left));
3210                 uio_uio_resid_add(uiop, -(left));
3211                 uiop->uio_offset += left;
3212         }
3213
3214         /*
3215          * If returning no data, assume end of file.
3216          * If not bigenough, return not end of file, since you aren't
3217          *    returning all the data
3218          * Otherwise, return the eof flag from the server.
3219          */
3220         if (eofp) {
3221                 if (tresid == ((size_t)(uio_uio_resid(uiop))))
3222                         *eofp = 1;
3223                 else if (!bigenough)
3224                         *eofp = 0;
3225                 else
3226                         *eofp = eof;
3227         }
3228
3229         /*
3230          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3231          */
3232         while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3233                 dp = (struct dirent *)uio_iov_base(uiop);
3234                 dp->d_type = DT_UNKNOWN;
3235                 dp->d_fileno = 0;
3236                 dp->d_namlen = 0;
3237                 dp->d_name[0] = '\0';
3238                 tl = (u_int32_t *)&dp->d_name[4];
3239                 *tl++ = cookie.lval[0];
3240                 *tl = cookie.lval[1];
3241                 dp->d_reclen = DIRBLKSIZ;
3242                 uio_iov_base_add(uiop, DIRBLKSIZ);
3243                 uio_iov_len_add(uiop, -(DIRBLKSIZ));
3244                 uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3245                 uiop->uio_offset += DIRBLKSIZ;
3246         }
3247
3248 nfsmout:
3249         if (nd->nd_mrep != NULL)
3250                 mbuf_freem(nd->nd_mrep);
3251         return (error);
3252 }
3253
3254 #ifndef APPLE
3255 /*
3256  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3257  * (Also used for NFS V4 when mount flag set.)
3258  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3259  */
3260 APPLESTATIC int
3261 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3262     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3263     int *eofp, void *stuff)
3264 {
3265         int len, left;
3266         struct dirent *dp = NULL;
3267         u_int32_t *tl;
3268         vnode_t newvp = NULLVP;
3269         struct nfsrv_descript nfsd, *nd = &nfsd;
3270         struct nameidata nami, *ndp = &nami;
3271         struct componentname *cnp = &ndp->ni_cnd;
3272         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3273         struct nfsnode *dnp = VTONFS(vp), *np;
3274         struct nfsvattr nfsva;
3275         struct nfsfh *nfhp;
3276         nfsquad_t cookie, ncookie;
3277         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3278         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3279         int isdotdot = 0, unlocknewvp = 0;
3280         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3281         u_int64_t fileno = 0;
3282         char *cp;
3283         nfsattrbit_t attrbits, dattrbits;
3284         size_t tresid;
3285         u_int32_t *tl2 = NULL, rderr;
3286         struct timespec dctime;
3287
3288         KASSERT(uiop->uio_iovcnt == 1 &&
3289             (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3290             ("nfs readdirplusrpc bad uio"));
3291         ncookie.lval[0] = ncookie.lval[1] = 0;
3292         timespecclear(&dctime);
3293         *attrflagp = 0;
3294         if (eofp != NULL)
3295                 *eofp = 0;
3296         ndp->ni_dvp = vp;
3297         nd->nd_mrep = NULL;
3298         cookie.lval[0] = cookiep->nfsuquad[0];
3299         cookie.lval[1] = cookiep->nfsuquad[1];
3300         tresid = uio_uio_resid(uiop);
3301
3302         /*
3303          * For NFSv4, first create the "." and ".." entries.
3304          */
3305         if (NFSHASNFSV4(nmp)) {
3306                 NFSGETATTR_ATTRBIT(&dattrbits);
3307                 NFSZERO_ATTRBIT(&attrbits);
3308                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3309                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3310                     NFSATTRBIT_MOUNTEDONFILEID)) {
3311                         NFSSETBIT_ATTRBIT(&attrbits,
3312                             NFSATTRBIT_MOUNTEDONFILEID);
3313                         gotmnton = 1;
3314                 } else {
3315                         /*
3316                          * Must fake it. Use the fileno, except when the
3317                          * fsid is != to that of the directory. For that
3318                          * case, generate a fake fileno that is not the same.
3319                          */
3320                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3321                         gotmnton = 0;
3322                 }
3323
3324                 /*
3325                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3326                  */
3327                 if (uiop->uio_offset == 0) {
3328                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3329                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3330                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3331                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3332                         (void) nfsrv_putattrbit(nd, &attrbits);
3333                         error = nfscl_request(nd, vp, p, cred, stuff);
3334                         if (error)
3335                             return (error);
3336                         dotfileid = 0;  /* Fake out the compiler. */
3337                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3338                             error = nfsm_loadattr(nd, &nfsva);
3339                             if (error != 0)
3340                                 goto nfsmout;
3341                             dctime = nfsva.na_ctime;
3342                             dotfileid = nfsva.na_fileid;
3343                         }
3344                         if (nd->nd_repstat == 0) {
3345                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3346                             len = fxdr_unsigned(int, *(tl + 4));
3347                             if (len > 0 && len <= NFSX_V4FHMAX)
3348                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3349                             else
3350                                 error = EPERM;
3351                             if (!error) {
3352                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3353                                 nfsva.na_mntonfileno = UINT64_MAX;
3354                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3355                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3356                                     NULL, NULL, NULL, p, cred);
3357                                 if (error) {
3358                                     dotdotfileid = dotfileid;
3359                                 } else if (gotmnton) {
3360                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3361                                         dotdotfileid = nfsva.na_mntonfileno;
3362                                     else
3363                                         dotdotfileid = nfsva.na_fileid;
3364                                 } else if (nfsva.na_filesid[0] ==
3365                                     dnp->n_vattr.na_filesid[0] &&
3366                                     nfsva.na_filesid[1] ==
3367                                     dnp->n_vattr.na_filesid[1]) {
3368                                     dotdotfileid = nfsva.na_fileid;
3369                                 } else {
3370                                     do {
3371                                         fakefileno--;
3372                                     } while (fakefileno ==
3373                                         nfsva.na_fileid);
3374                                     dotdotfileid = fakefileno;
3375                                 }
3376                             }
3377                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3378                             /*
3379                              * Lookupp returns NFSERR_NOENT when we are
3380                              * at the root, so just use the current dir.
3381                              */
3382                             nd->nd_repstat = 0;
3383                             dotdotfileid = dotfileid;
3384                         } else {
3385                             error = nd->nd_repstat;
3386                         }
3387                         mbuf_freem(nd->nd_mrep);
3388                         if (error)
3389                             return (error);
3390                         nd->nd_mrep = NULL;
3391                         dp = (struct dirent *)uio_iov_base(uiop);
3392                         dp->d_off = 0;
3393                         dp->d_type = DT_DIR;
3394                         dp->d_fileno = dotfileid;
3395                         dp->d_namlen = 1;
3396                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3397                         dp->d_name[0] = '.';
3398                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3399                         /*
3400                          * Just make these offset cookie 0.
3401                          */
3402                         tl = (u_int32_t *)&dp->d_name[8];
3403                         *tl++ = 0;
3404                         *tl = 0;
3405                         blksiz += dp->d_reclen;
3406                         uio_uio_resid_add(uiop, -(dp->d_reclen));
3407                         uiop->uio_offset += dp->d_reclen;
3408                         uio_iov_base_add(uiop, dp->d_reclen);
3409                         uio_iov_len_add(uiop, -(dp->d_reclen));
3410                         dp = (struct dirent *)uio_iov_base(uiop);
3411                         dp->d_off = 0;
3412                         dp->d_type = DT_DIR;
3413                         dp->d_fileno = dotdotfileid;
3414                         dp->d_namlen = 2;
3415                         *((uint64_t *)dp->d_name) = 0;
3416                         dp->d_name[0] = '.';
3417                         dp->d_name[1] = '.';
3418                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3419                         /*
3420                          * Just make these offset cookie 0.
3421                          */
3422                         tl = (u_int32_t *)&dp->d_name[8];
3423                         *tl++ = 0;
3424                         *tl = 0;
3425                         blksiz += dp->d_reclen;
3426                         uio_uio_resid_add(uiop, -(dp->d_reclen));
3427                         uiop->uio_offset += dp->d_reclen;
3428                         uio_iov_base_add(uiop, dp->d_reclen);
3429                         uio_iov_len_add(uiop, -(dp->d_reclen));
3430                 }
3431                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3432                 if (gotmnton)
3433                         NFSSETBIT_ATTRBIT(&attrbits,
3434                             NFSATTRBIT_MOUNTEDONFILEID);
3435         }
3436
3437         /*
3438          * Loop around doing readdir rpc's of size nm_readdirsize.
3439          * The stopping criteria is EOF or buffer full.
3440          */
3441         while (more_dirs && bigenough) {
3442                 *attrflagp = 0;
3443                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3444                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3445                 *tl++ = cookie.lval[0];
3446                 *tl++ = cookie.lval[1];
3447                 if (cookie.qval == 0) {
3448                         *tl++ = 0;
3449                         *tl++ = 0;
3450                 } else {
3451                         NFSLOCKNODE(dnp);
3452                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3453                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3454                         NFSUNLOCKNODE(dnp);
3455                 }
3456                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3457                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3458                 if (nd->nd_flag & ND_NFSV4) {
3459                         (void) nfsrv_putattrbit(nd, &attrbits);
3460                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3461                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3462                         (void) nfsrv_putattrbit(nd, &dattrbits);
3463                 }
3464                 error = nfscl_request(nd, vp, p, cred, stuff);
3465                 if (error)
3466                         return (error);
3467                 if (nd->nd_flag & ND_NFSV3)
3468                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3469                 if (nd->nd_repstat || error) {
3470                         if (!error)
3471                                 error = nd->nd_repstat;
3472                         goto nfsmout;
3473                 }
3474                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3475                         dctime = nap->na_ctime;
3476                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3477                 NFSLOCKNODE(dnp);
3478                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3479                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3480                 NFSUNLOCKNODE(dnp);
3481                 more_dirs = fxdr_unsigned(int, *tl);
3482                 if (!more_dirs)
3483                         tryformoredirs = 0;
3484         
3485                 /* loop through the dir entries, doctoring them to 4bsd form */
3486                 while (more_dirs && bigenough) {
3487                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3488                         if (nd->nd_flag & ND_NFSV4) {
3489                                 ncookie.lval[0] = *tl++;
3490                                 ncookie.lval[1] = *tl++;
3491                         } else {
3492                                 fileno = fxdr_hyper(tl);
3493                                 tl += 2;
3494                         }
3495                         len = fxdr_unsigned(int, *tl);
3496                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3497                                 error = EBADRPC;
3498                                 goto nfsmout;
3499                         }
3500                         tlen = roundup2(len, 8);
3501                         if (tlen == len)
3502                                 tlen += 8;  /* To ensure null termination. */
3503                         left = DIRBLKSIZ - blksiz;
3504                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3505                                 dp->d_reclen += left;
3506                                 uio_iov_base_add(uiop, left);
3507                                 uio_iov_len_add(uiop, -(left));
3508                                 uio_uio_resid_add(uiop, -(left));
3509                                 uiop->uio_offset += left;
3510                                 blksiz = 0;
3511                         }
3512                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3513                             uio_uio_resid(uiop))
3514                                 bigenough = 0;
3515                         if (bigenough) {
3516                                 dp = (struct dirent *)uio_iov_base(uiop);
3517                                 dp->d_off = 0;
3518                                 dp->d_namlen = len;
3519                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3520                                     NFSX_HYPER;
3521                                 dp->d_type = DT_UNKNOWN;
3522                                 blksiz += dp->d_reclen;
3523                                 if (blksiz == DIRBLKSIZ)
3524                                         blksiz = 0;
3525                                 uio_uio_resid_add(uiop, -(DIRHDSIZ));
3526                                 uiop->uio_offset += DIRHDSIZ;
3527                                 uio_iov_base_add(uiop, DIRHDSIZ);
3528                                 uio_iov_len_add(uiop, -(DIRHDSIZ));
3529                                 cnp->cn_nameptr = uio_iov_base(uiop);
3530                                 cnp->cn_namelen = len;
3531                                 NFSCNHASHZERO(cnp);
3532                                 error = nfsm_mbufuio(nd, uiop, len);
3533                                 if (error)
3534                                         goto nfsmout;
3535                                 cp = uio_iov_base(uiop);
3536                                 tlen -= len;
3537                                 *cp = '\0';
3538                                 cp += tlen;     /* points to cookie storage */
3539                                 tl2 = (u_int32_t *)cp;
3540                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3541                                     cnp->cn_nameptr[1] == '.')
3542                                         isdotdot = 1;
3543                                 else
3544                                         isdotdot = 0;
3545                                 uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3546                                 uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3547                                 uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3548                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3549                         } else {
3550                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3551                                 if (error)
3552                                         goto nfsmout;
3553                         }
3554                         nfhp = NULL;
3555                         if (nd->nd_flag & ND_NFSV3) {
3556                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3557                                 ncookie.lval[0] = *tl++;
3558                                 ncookie.lval[1] = *tl++;
3559                                 attrflag = fxdr_unsigned(int, *tl);
3560                                 if (attrflag) {
3561                                   error = nfsm_loadattr(nd, &nfsva);
3562                                   if (error)
3563                                         goto nfsmout;
3564                                 }
3565                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3566                                 if (*tl) {
3567                                         error = nfsm_getfh(nd, &nfhp);
3568                                         if (error)
3569                                             goto nfsmout;
3570                                 }
3571                                 if (!attrflag && nfhp != NULL) {
3572                                         free(nfhp, M_NFSFH);
3573                                         nfhp = NULL;
3574                                 }
3575                         } else {
3576                                 rderr = 0;
3577                                 nfsva.na_mntonfileno = 0xffffffff;
3578                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3579                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3580                                     NULL, NULL, &rderr, p, cred);
3581                                 if (error)
3582                                         goto nfsmout;
3583                         }
3584
3585                         if (bigenough) {
3586                             if (nd->nd_flag & ND_NFSV4) {
3587                                 if (rderr) {
3588                                     dp->d_fileno = 0;
3589                                 } else if (gotmnton) {
3590                                     if (nfsva.na_mntonfileno != 0xffffffff)
3591                                         dp->d_fileno = nfsva.na_mntonfileno;
3592                                     else
3593                                         dp->d_fileno = nfsva.na_fileid;
3594                                 } else if (nfsva.na_filesid[0] ==
3595                                     dnp->n_vattr.na_filesid[0] &&
3596                                     nfsva.na_filesid[1] ==
3597                                     dnp->n_vattr.na_filesid[1]) {
3598                                     dp->d_fileno = nfsva.na_fileid;
3599                                 } else {
3600                                     do {
3601                                         fakefileno--;
3602                                     } while (fakefileno ==
3603                                         nfsva.na_fileid);
3604                                     dp->d_fileno = fakefileno;
3605                                 }
3606                             } else {
3607                                 dp->d_fileno = fileno;
3608                             }
3609                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3610                                 ncookie.lval[0];
3611                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3612                                 ncookie.lval[1];
3613
3614                             if (nfhp != NULL) {
3615                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3616                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3617                                     VREF(vp);
3618                                     newvp = vp;
3619                                     unlocknewvp = 0;
3620                                     free(nfhp, M_NFSFH);
3621                                     np = dnp;
3622                                 } else if (isdotdot != 0) {
3623                                     /*
3624                                      * Skip doing a nfscl_nget() call for "..".
3625                                      * There's a race between acquiring the nfs
3626                                      * node here and lookups that look for the
3627                                      * directory being read (in the parent).
3628                                      * It would try to get a lock on ".." here,
3629                                      * owning the lock on the directory being
3630                                      * read. Lookup will hold the lock on ".."
3631                                      * and try to acquire the lock on the
3632                                      * directory being read.
3633                                      * If the directory is unlocked/relocked,
3634                                      * then there is a LOR with the buflock
3635                                      * vp is relocked.
3636                                      */
3637                                     free(nfhp, M_NFSFH);
3638                                 } else {
3639                                     error = nfscl_nget(vnode_mount(vp), vp,
3640                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3641                                     if (!error) {
3642                                         newvp = NFSTOV(np);
3643                                         unlocknewvp = 1;
3644                                     }
3645                                 }
3646                                 nfhp = NULL;
3647                                 if (newvp != NULLVP) {
3648                                     error = nfscl_loadattrcache(&newvp,
3649                                         &nfsva, NULL, NULL, 0, 0);
3650                                     if (error) {
3651                                         if (unlocknewvp)
3652                                             vput(newvp);
3653                                         else
3654                                             vrele(newvp);
3655                                         goto nfsmout;
3656                                     }
3657                                     dp->d_type =
3658                                         vtonfs_dtype(np->n_vattr.na_type);
3659                                     ndp->ni_vp = newvp;
3660                                     NFSCNHASH(cnp, HASHINIT);
3661                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3662                                         (newvp->v_type != VDIR ||
3663                                          dctime.tv_sec != 0)) {
3664                                         cache_enter_time(ndp->ni_dvp,
3665                                             ndp->ni_vp, cnp,
3666                                             &nfsva.na_ctime,
3667                                             newvp->v_type != VDIR ? NULL :
3668                                             &dctime);
3669                                     }
3670                                     if (unlocknewvp)
3671                                         vput(newvp);
3672                                     else
3673                                         vrele(newvp);
3674                                     newvp = NULLVP;
3675                                 }
3676                             }
3677                         } else if (nfhp != NULL) {
3678                             free(nfhp, M_NFSFH);
3679                         }
3680                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3681                         more_dirs = fxdr_unsigned(int, *tl);
3682                 }
3683                 /*
3684                  * If at end of rpc data, get the eof boolean
3685                  */
3686                 if (!more_dirs) {
3687                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3688                         eof = fxdr_unsigned(int, *tl);
3689                         if (tryformoredirs)
3690                                 more_dirs = !eof;
3691                         if (nd->nd_flag & ND_NFSV4) {
3692                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3693                                     stuff);
3694                                 if (error)
3695                                         goto nfsmout;
3696                         }
3697                 }
3698                 mbuf_freem(nd->nd_mrep);
3699                 nd->nd_mrep = NULL;
3700         }
3701         /*
3702          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3703          * by increasing d_reclen for the last record.
3704          */
3705         if (blksiz > 0) {
3706                 left = DIRBLKSIZ - blksiz;
3707                 dp->d_reclen += left;
3708                 uio_iov_base_add(uiop, left);
3709                 uio_iov_len_add(uiop, -(left));
3710                 uio_uio_resid_add(uiop, -(left));
3711                 uiop->uio_offset += left;
3712         }
3713
3714         /*
3715          * If returning no data, assume end of file.
3716          * If not bigenough, return not end of file, since you aren't
3717          *    returning all the data
3718          * Otherwise, return the eof flag from the server.
3719          */
3720         if (eofp != NULL) {
3721                 if (tresid == uio_uio_resid(uiop))
3722                         *eofp = 1;
3723                 else if (!bigenough)
3724                         *eofp = 0;
3725                 else
3726                         *eofp = eof;
3727         }
3728
3729         /*
3730          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3731          */
3732         while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3733                 dp = (struct dirent *)uio_iov_base(uiop);
3734                 dp->d_type = DT_UNKNOWN;
3735                 dp->d_fileno = 0;
3736                 dp->d_namlen = 0;
3737                 dp->d_name[0] = '\0';
3738                 tl = (u_int32_t *)&dp->d_name[4];
3739                 *tl++ = cookie.lval[0];
3740                 *tl = cookie.lval[1];
3741                 dp->d_reclen = DIRBLKSIZ;
3742                 uio_iov_base_add(uiop, DIRBLKSIZ);
3743                 uio_iov_len_add(uiop, -(DIRBLKSIZ));
3744                 uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3745                 uiop->uio_offset += DIRBLKSIZ;
3746         }
3747
3748 nfsmout:
3749         if (nd->nd_mrep != NULL)
3750                 mbuf_freem(nd->nd_mrep);
3751         return (error);
3752 }
3753 #endif  /* !APPLE */
3754
3755 /*
3756  * Nfs commit rpc
3757  */
3758 APPLESTATIC int
3759 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3760     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3761 {
3762         u_int32_t *tl;
3763         struct nfsrv_descript nfsd, *nd = &nfsd;
3764         nfsattrbit_t attrbits;
3765         int error;
3766         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3767         
3768         *attrflagp = 0;
3769         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3770         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3771         txdr_hyper(offset, tl);
3772         tl += 2;
3773         *tl = txdr_unsigned(cnt);
3774         if (nd->nd_flag & ND_NFSV4) {
3775                 /*
3776                  * And do a Getattr op.
3777                  */
3778                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3779                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3780                 NFSGETATTR_ATTRBIT(&attrbits);
3781                 (void) nfsrv_putattrbit(nd, &attrbits);
3782         }
3783         error = nfscl_request(nd, vp, p, cred, stuff);
3784         if (error)
3785                 return (error);
3786         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3787         if (!error && !nd->nd_repstat) {
3788                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3789                 NFSLOCKMNT(nmp);
3790                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3791                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3792                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
3793                 }
3794                 NFSUNLOCKMNT(nmp);
3795                 if (nd->nd_flag & ND_NFSV4)
3796                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3797         }
3798 nfsmout:
3799         if (!error && nd->nd_repstat)
3800                 error = nd->nd_repstat;
3801         mbuf_freem(nd->nd_mrep);
3802         return (error);
3803 }
3804
3805 /*
3806  * NFS byte range lock rpc.
3807  * (Mostly just calls one of the three lower level RPC routines.)
3808  */
3809 APPLESTATIC int
3810 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3811     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3812 {
3813         struct nfscllockowner *lp;
3814         struct nfsclclient *clp;
3815         struct nfsfh *nfhp;
3816         struct nfsrv_descript nfsd, *nd = &nfsd;
3817         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3818         u_int64_t off, len;
3819         off_t start, end;
3820         u_int32_t clidrev = 0;
3821         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3822         int callcnt, dorpc;
3823
3824         /*
3825          * Convert the flock structure into a start and end and do POSIX
3826          * bounds checking.
3827          */
3828         switch (fl->l_whence) {
3829         case SEEK_SET:
3830         case SEEK_CUR:
3831                 /*
3832                  * Caller is responsible for adding any necessary offset
3833                  * when SEEK_CUR is used.
3834                  */
3835                 start = fl->l_start;
3836                 off = fl->l_start;
3837                 break;
3838         case SEEK_END:
3839                 start = size + fl->l_start;
3840                 off = size + fl->l_start;
3841                 break;
3842         default:
3843                 return (EINVAL);
3844         }
3845         if (start < 0)
3846                 return (EINVAL);
3847         if (fl->l_len != 0) {
3848                 end = start + fl->l_len - 1;
3849                 if (end < start)
3850                         return (EINVAL);
3851         }
3852
3853         len = fl->l_len;
3854         if (len == 0)
3855                 len = NFS64BITSSET;
3856         retrycnt = 0;
3857         do {
3858             nd->nd_repstat = 0;
3859             if (op == F_GETLK) {
3860                 error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3861                 if (error)
3862                         return (error);
3863                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3864                 if (!error) {
3865                         clidrev = clp->nfsc_clientidrev;
3866                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3867                             p, id, flags);
3868                 } else if (error == -1) {
3869                         error = 0;
3870                 }
3871                 nfscl_clientrelease(clp);
3872             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3873                 /*
3874                  * We must loop around for all lockowner cases.
3875                  */
3876                 callcnt = 0;
3877                 error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3878                 if (error)
3879                         return (error);
3880                 do {
3881                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3882                         clp, id, flags, &lp, &dorpc);
3883                     /*
3884                      * If it returns a NULL lp, we're done.
3885                      */
3886                     if (lp == NULL) {
3887                         if (callcnt == 0)
3888                             nfscl_clientrelease(clp);
3889                         else
3890                             nfscl_releasealllocks(clp, vp, p, id, flags);
3891                         return (error);
3892                     }
3893                     if (nmp->nm_clp != NULL)
3894                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3895                     else
3896                         clidrev = 0;
3897                     /*
3898                      * If the server doesn't support Posix lock semantics,
3899                      * only allow locks on the entire file, since it won't
3900                      * handle overlapping byte ranges.
3901                      * There might still be a problem when a lock
3902                      * upgrade/downgrade (read<->write) occurs, since the
3903                      * server "might" expect an unlock first?
3904                      */
3905                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3906                         (off == 0 && len == NFS64BITSSET))) {
3907                         /*
3908                          * Since the lock records will go away, we must
3909                          * wait for grace and delay here.
3910                          */
3911                         do {
3912                             error = nfsrpc_locku(nd, nmp, lp, off, len,
3913                                 NFSV4LOCKT_READ, cred, p, 0);
3914                             if ((nd->nd_repstat == NFSERR_GRACE ||
3915                                  nd->nd_repstat == NFSERR_DELAY) &&
3916                                 error == 0)
3917                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3918                                     "nfs_advlock");
3919                         } while ((nd->nd_repstat == NFSERR_GRACE ||
3920                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
3921                     }
3922                     callcnt++;
3923                 } while (error == 0 && nd->nd_repstat == 0);
3924                 nfscl_releasealllocks(clp, vp, p, id, flags);
3925             } else if (op == F_SETLK) {
3926                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3927                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3928                 if (error || donelocally) {
3929                         return (error);
3930                 }
3931                 if (nmp->nm_clp != NULL)
3932                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3933                 else
3934                         clidrev = 0;
3935                 nfhp = VTONFS(vp)->n_fhp;
3936                 if (!lp->nfsl_open->nfso_posixlock &&
3937                     (off != 0 || len != NFS64BITSSET)) {
3938                         error = EINVAL;
3939                 } else {
3940                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3941                             nfhp->nfh_len, lp, newone, reclaim, off,
3942                             len, fl->l_type, cred, p, 0);
3943                 }
3944                 if (!error)
3945                         error = nd->nd_repstat;
3946                 nfscl_lockrelease(lp, error, newone);
3947             } else {
3948                 error = EINVAL;
3949             }
3950             if (!error)
3951                 error = nd->nd_repstat;
3952             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3953                 error == NFSERR_STALEDONTRECOVER ||
3954                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3955                 error == NFSERR_BADSESSION) {
3956                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
3957             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3958                 && clidrev != 0) {
3959                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3960                 retrycnt++;
3961             }
3962         } while (error == NFSERR_GRACE ||
3963             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3964             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3965             error == NFSERR_BADSESSION ||
3966             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3967              expireret == 0 && clidrev != 0 && retrycnt < 4));
3968         if (error && retrycnt >= 4)
3969                 error = EIO;
3970         return (error);
3971 }
3972
3973 /*
3974  * The lower level routine for the LockT case.
3975  */
3976 APPLESTATIC int
3977 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3978     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3979     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3980 {
3981         u_int32_t *tl;
3982         int error, type, size;
3983         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3984         struct nfsnode *np;
3985         struct nfsmount *nmp;
3986         struct nfsclsession *tsep;
3987
3988         nmp = VFSTONFS(vp->v_mount);
3989         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3990         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3991         if (fl->l_type == F_RDLCK)
3992                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3993         else
3994                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3995         txdr_hyper(off, tl);
3996         tl += 2;
3997         txdr_hyper(len, tl);
3998         tl += 2;
3999         tsep = nfsmnt_mdssession(nmp);
4000         *tl++ = tsep->nfsess_clientid.lval[0];
4001         *tl = tsep->nfsess_clientid.lval[1];
4002         nfscl_filllockowner(id, own, flags);
4003         np = VTONFS(vp);
4004         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4005             np->n_fhp->nfh_len);
4006         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4007         error = nfscl_request(nd, vp, p, cred, NULL);
4008         if (error)
4009                 return (error);
4010         if (nd->nd_repstat == 0) {
4011                 fl->l_type = F_UNLCK;
4012         } else if (nd->nd_repstat == NFSERR_DENIED) {
4013                 nd->nd_repstat = 0;
4014                 fl->l_whence = SEEK_SET;
4015                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4016                 fl->l_start = fxdr_hyper(tl);
4017                 tl += 2;
4018                 len = fxdr_hyper(tl);
4019                 tl += 2;
4020                 if (len == NFS64BITSSET)
4021                         fl->l_len = 0;
4022                 else
4023                         fl->l_len = len;
4024                 type = fxdr_unsigned(int, *tl++);
4025                 if (type == NFSV4LOCKT_WRITE)
4026                         fl->l_type = F_WRLCK;
4027                 else
4028                         fl->l_type = F_RDLCK;
4029                 /*
4030                  * XXX For now, I have no idea what to do with the
4031                  * conflicting lock_owner, so I'll just set the pid == 0
4032                  * and skip over the lock_owner.
4033                  */
4034                 fl->l_pid = (pid_t)0;
4035                 tl += 2;
4036                 size = fxdr_unsigned(int, *tl);
4037                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4038                         error = EBADRPC;
4039                 if (!error)
4040                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4041         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4042                 nfscl_initiate_recovery(clp);
4043 nfsmout:
4044         mbuf_freem(nd->nd_mrep);
4045         return (error);
4046 }
4047
4048 /*
4049  * Lower level function that performs the LockU RPC.
4050  */
4051 static int
4052 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4053     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4054     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4055 {
4056         u_int32_t *tl;
4057         int error;
4058
4059         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4060             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4061         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4062         *tl++ = txdr_unsigned(type);
4063         *tl = txdr_unsigned(lp->nfsl_seqid);
4064         if (nfstest_outofseq &&
4065             (arc4random() % nfstest_outofseq) == 0)
4066                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4067         tl++;
4068         if (NFSHASNFSV4N(nmp))
4069                 *tl++ = 0;
4070         else
4071                 *tl++ = lp->nfsl_stateid.seqid;
4072         *tl++ = lp->nfsl_stateid.other[0];
4073         *tl++ = lp->nfsl_stateid.other[1];
4074         *tl++ = lp->nfsl_stateid.other[2];
4075         txdr_hyper(off, tl);
4076         tl += 2;
4077         txdr_hyper(len, tl);
4078         if (syscred)
4079                 nd->nd_flag |= ND_USEGSSNAME;
4080         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4081             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4082         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4083         if (error)
4084                 return (error);
4085         if (nd->nd_repstat == 0) {
4086                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4087                 lp->nfsl_stateid.seqid = *tl++;
4088                 lp->nfsl_stateid.other[0] = *tl++;
4089                 lp->nfsl_stateid.other[1] = *tl++;
4090                 lp->nfsl_stateid.other[2] = *tl;
4091         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4092                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4093 nfsmout:
4094         mbuf_freem(nd->nd_mrep);
4095         return (error);
4096 }
4097
4098 /*
4099  * The actual Lock RPC.
4100  */
4101 APPLESTATIC int
4102 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4103     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4104     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4105     NFSPROC_T *p, int syscred)
4106 {
4107         u_int32_t *tl;
4108         int error, size;
4109         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4110         struct nfsclsession *tsep;
4111
4112         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4113         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4114         if (type == F_RDLCK)
4115                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4116         else
4117                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4118         *tl++ = txdr_unsigned(reclaim);
4119         txdr_hyper(off, tl);
4120         tl += 2;
4121         txdr_hyper(len, tl);
4122         tl += 2;
4123         if (newone) {
4124             *tl = newnfs_true;
4125             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4126                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4127             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4128             if (NFSHASNFSV4N(nmp))
4129                 *tl++ = 0;
4130             else
4131                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4132             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4133             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4134             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4135             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4136             tsep = nfsmnt_mdssession(nmp);
4137             *tl++ = tsep->nfsess_clientid.lval[0];
4138             *tl = tsep->nfsess_clientid.lval[1];
4139             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4140             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4141             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4142         } else {
4143             *tl = newnfs_false;
4144             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4145             if (NFSHASNFSV4N(nmp))
4146                 *tl++ = 0;
4147             else
4148                 *tl++ = lp->nfsl_stateid.seqid;
4149             *tl++ = lp->nfsl_stateid.other[0];
4150             *tl++ = lp->nfsl_stateid.other[1];
4151             *tl++ = lp->nfsl_stateid.other[2];
4152             *tl = txdr_unsigned(lp->nfsl_seqid);
4153             if (nfstest_outofseq &&
4154                 (arc4random() % nfstest_outofseq) == 0)
4155                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4156         }
4157         if (syscred)
4158                 nd->nd_flag |= ND_USEGSSNAME;
4159         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4160             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4161         if (error)
4162                 return (error);
4163         if (newone)
4164             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4165         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4166         if (nd->nd_repstat == 0) {
4167                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4168                 lp->nfsl_stateid.seqid = *tl++;
4169                 lp->nfsl_stateid.other[0] = *tl++;
4170                 lp->nfsl_stateid.other[1] = *tl++;
4171                 lp->nfsl_stateid.other[2] = *tl;
4172         } else if (nd->nd_repstat == NFSERR_DENIED) {
4173                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4174                 size = fxdr_unsigned(int, *(tl + 7));
4175                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4176                         error = EBADRPC;
4177                 if (!error)
4178                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4179         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4180                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4181 nfsmout:
4182         mbuf_freem(nd->nd_mrep);
4183         return (error);
4184 }
4185
4186 /*
4187  * nfs statfs rpc
4188  * (always called with the vp for the mount point)
4189  */
4190 APPLESTATIC int
4191 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4192     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4193     void *stuff)
4194 {
4195         u_int32_t *tl = NULL;
4196         struct nfsrv_descript nfsd, *nd = &nfsd;
4197         struct nfsmount *nmp;
4198         nfsattrbit_t attrbits;
4199         int error;
4200
4201         *attrflagp = 0;
4202         nmp = VFSTONFS(vnode_mount(vp));
4203         if (NFSHASNFSV4(nmp)) {
4204                 /*
4205                  * For V4, you actually do a getattr.
4206                  */
4207                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4208                 NFSSTATFS_GETATTRBIT(&attrbits);
4209                 (void) nfsrv_putattrbit(nd, &attrbits);
4210                 nd->nd_flag |= ND_USEGSSNAME;
4211                 error = nfscl_request(nd, vp, p, cred, stuff);
4212                 if (error)
4213                         return (error);
4214                 if (nd->nd_repstat == 0) {
4215                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4216                             NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4217                             cred);
4218                         if (!error) {
4219                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4220                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4221                                 NFSSETHASSETFSID(nmp);
4222                                 *attrflagp = 1;
4223                         }
4224                 } else {
4225                         error = nd->nd_repstat;
4226                 }
4227                 if (error)
4228                         goto nfsmout;
4229         } else {
4230                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4231                 error = nfscl_request(nd, vp, p, cred, stuff);
4232                 if (error)
4233                         return (error);
4234                 if (nd->nd_flag & ND_NFSV3) {
4235                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4236                         if (error)
4237                                 goto nfsmout;
4238                 }
4239                 if (nd->nd_repstat) {
4240                         error = nd->nd_repstat;
4241                         goto nfsmout;
4242                 }
4243                 NFSM_DISSECT(tl, u_int32_t *,
4244                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4245         }
4246         if (NFSHASNFSV3(nmp)) {
4247                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4248                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4249                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4250                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4251                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4252                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4253                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4254         } else if (NFSHASNFSV4(nmp) == 0) {
4255                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4256                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4257                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4258                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4259                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4260         }
4261 nfsmout:
4262         mbuf_freem(nd->nd_mrep);
4263         return (error);
4264 }
4265
4266 /*
4267  * nfs pathconf rpc
4268  */
4269 APPLESTATIC int
4270 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4271     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4272     void *stuff)
4273 {
4274         struct nfsrv_descript nfsd, *nd = &nfsd;
4275         struct nfsmount *nmp;
4276         u_int32_t *tl;
4277         nfsattrbit_t attrbits;
4278         int error;
4279
4280         *attrflagp = 0;
4281         nmp = VFSTONFS(vnode_mount(vp));
4282         if (NFSHASNFSV4(nmp)) {
4283                 /*
4284                  * For V4, you actually do a getattr.
4285                  */
4286                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4287                 NFSPATHCONF_GETATTRBIT(&attrbits);
4288                 (void) nfsrv_putattrbit(nd, &attrbits);
4289                 nd->nd_flag |= ND_USEGSSNAME;
4290                 error = nfscl_request(nd, vp, p, cred, stuff);
4291                 if (error)
4292                         return (error);
4293                 if (nd->nd_repstat == 0) {
4294                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4295                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4296                             cred);
4297                         if (!error)
4298                                 *attrflagp = 1;
4299                 } else {
4300                         error = nd->nd_repstat;
4301                 }
4302         } else {
4303                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4304                 error = nfscl_request(nd, vp, p, cred, stuff);
4305                 if (error)
4306                         return (error);
4307                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4308                 if (nd->nd_repstat && !error)
4309                         error = nd->nd_repstat;
4310                 if (!error) {
4311                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4312                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4313                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4314                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4315                         pc->pc_chownrestricted =
4316                             fxdr_unsigned(u_int32_t, *tl++);
4317                         pc->pc_caseinsensitive =
4318                             fxdr_unsigned(u_int32_t, *tl++);
4319                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4320                 }
4321         }
4322 nfsmout:
4323         mbuf_freem(nd->nd_mrep);
4324         return (error);
4325 }
4326
4327 /*
4328  * nfs version 3 fsinfo rpc call
4329  */
4330 APPLESTATIC int
4331 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4332     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4333 {
4334         u_int32_t *tl;
4335         struct nfsrv_descript nfsd, *nd = &nfsd;
4336         int error;
4337
4338         *attrflagp = 0;
4339         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4340         error = nfscl_request(nd, vp, p, cred, stuff);
4341         if (error)
4342                 return (error);
4343         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4344         if (nd->nd_repstat && !error)
4345                 error = nd->nd_repstat;
4346         if (!error) {
4347                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4348                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4349                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4350                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4351                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4352                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4353                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4354                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4355                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4356                 tl += 2;
4357                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4358                 tl += 2;
4359                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4360         }
4361 nfsmout:
4362         mbuf_freem(nd->nd_mrep);
4363         return (error);
4364 }
4365
4366 /*
4367  * This function performs the Renew RPC.
4368  */
4369 APPLESTATIC int
4370 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4371     NFSPROC_T *p)
4372 {
4373         u_int32_t *tl;
4374         struct nfsrv_descript nfsd;
4375         struct nfsrv_descript *nd = &nfsd;
4376         struct nfsmount *nmp;
4377         int error;
4378         struct nfssockreq *nrp;
4379         struct nfsclsession *tsep;
4380
4381         nmp = clp->nfsc_nmp;
4382         if (nmp == NULL)
4383                 return (0);
4384         if (dsp == NULL)
4385                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4386                     0);
4387         else
4388                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4389                     &dsp->nfsclds_sess, 0, 0);
4390         if (!NFSHASNFSV4N(nmp)) {
4391                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4392                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4393                 tsep = nfsmnt_mdssession(nmp);
4394                 *tl++ = tsep->nfsess_clientid.lval[0];
4395                 *tl = tsep->nfsess_clientid.lval[1];
4396         }
4397         nrp = NULL;
4398         if (dsp != NULL)
4399                 nrp = dsp->nfsclds_sockp;
4400         if (nrp == NULL)
4401                 /* If NULL, use the MDS socket. */
4402                 nrp = &nmp->nm_sockreq;
4403         nd->nd_flag |= ND_USEGSSNAME;
4404         if (dsp == NULL)
4405                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4406                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4407         else
4408                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4409                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4410         if (error)
4411                 return (error);
4412         error = nd->nd_repstat;
4413         mbuf_freem(nd->nd_mrep);
4414         return (error);
4415 }
4416
4417 /*
4418  * This function performs the Releaselockowner RPC.
4419  */
4420 APPLESTATIC int
4421 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4422     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4423 {
4424         struct nfsrv_descript nfsd, *nd = &nfsd;
4425         u_int32_t *tl;
4426         int error;
4427         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4428         struct nfsclsession *tsep;
4429
4430         if (NFSHASNFSV4N(nmp)) {
4431                 /* For NFSv4.1, do a FreeStateID. */
4432                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4433                     NULL, 0, 0);
4434                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4435         } else {
4436                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4437                     NULL, 0, 0);
4438                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4439                 tsep = nfsmnt_mdssession(nmp);
4440                 *tl++ = tsep->nfsess_clientid.lval[0];
4441                 *tl = tsep->nfsess_clientid.lval[1];
4442                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4443                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4444                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4445         }
4446         nd->nd_flag |= ND_USEGSSNAME;
4447         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4448             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4449         if (error)
4450                 return (error);
4451         error = nd->nd_repstat;
4452         mbuf_freem(nd->nd_mrep);
4453         return (error);
4454 }
4455
4456 /*
4457  * This function performs the Compound to get the mount pt FH.
4458  */
4459 APPLESTATIC int
4460 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4461     NFSPROC_T *p)
4462 {
4463         u_int32_t *tl;
4464         struct nfsrv_descript nfsd;
4465         struct nfsrv_descript *nd = &nfsd;
4466         u_char *cp, *cp2;
4467         int error, cnt, len, setnil;
4468         u_int32_t *opcntp;
4469
4470         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4471             0);
4472         cp = dirpath;
4473         cnt = 0;
4474         do {
4475                 setnil = 0;
4476                 while (*cp == '/')
4477                         cp++;
4478                 cp2 = cp;
4479                 while (*cp2 != '\0' && *cp2 != '/')
4480                         cp2++;
4481                 if (*cp2 == '/') {
4482                         setnil = 1;
4483                         *cp2 = '\0';
4484                 }
4485                 if (cp2 != cp) {
4486                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4487                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4488                         nfsm_strtom(nd, cp, strlen(cp));
4489                         cnt++;
4490                 }
4491                 if (setnil)
4492                         *cp2++ = '/';
4493                 cp = cp2;
4494         } while (*cp != '\0');
4495         if (NFSHASNFSV4N(nmp))
4496                 /* Has a Sequence Op done by nfscl_reqstart(). */
4497                 *opcntp = txdr_unsigned(3 + cnt);
4498         else
4499                 *opcntp = txdr_unsigned(2 + cnt);
4500         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4501         *tl = txdr_unsigned(NFSV4OP_GETFH);
4502         nd->nd_flag |= ND_USEGSSNAME;
4503         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4504                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4505         if (error)
4506                 return (error);
4507         if (nd->nd_repstat == 0) {
4508                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4509                 tl += (2 + 2 * cnt);
4510                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4511                         len > NFSX_FHMAX) {
4512                         nd->nd_repstat = NFSERR_BADXDR;
4513                 } else {
4514                         nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4515                         if (nd->nd_repstat == 0)
4516                                 nmp->nm_fhsize = len;
4517                 }
4518         }
4519         error = nd->nd_repstat;
4520 nfsmout:
4521         mbuf_freem(nd->nd_mrep);
4522         return (error);
4523 }
4524
4525 /*
4526  * This function performs the Delegreturn RPC.
4527  */
4528 APPLESTATIC int
4529 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4530     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4531 {
4532         u_int32_t *tl;
4533         struct nfsrv_descript nfsd;
4534         struct nfsrv_descript *nd = &nfsd;
4535         int error;
4536
4537         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4538             dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4539         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4540         if (NFSHASNFSV4N(nmp))
4541                 *tl++ = 0;
4542         else
4543                 *tl++ = dp->nfsdl_stateid.seqid;
4544         *tl++ = dp->nfsdl_stateid.other[0];
4545         *tl++ = dp->nfsdl_stateid.other[1];
4546         *tl = dp->nfsdl_stateid.other[2];
4547         if (syscred)
4548                 nd->nd_flag |= ND_USEGSSNAME;
4549         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4550             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4551         if (error)
4552                 return (error);
4553         error = nd->nd_repstat;
4554         mbuf_freem(nd->nd_mrep);
4555         return (error);
4556 }
4557
4558 /*
4559  * nfs getacl call.
4560  */
4561 APPLESTATIC int
4562 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4563     struct acl *aclp, void *stuff)
4564 {
4565         struct nfsrv_descript nfsd, *nd = &nfsd;
4566         int error;
4567         nfsattrbit_t attrbits;
4568         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4569         
4570         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4571                 return (EOPNOTSUPP);
4572         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4573         NFSZERO_ATTRBIT(&attrbits);
4574         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4575         (void) nfsrv_putattrbit(nd, &attrbits);
4576         error = nfscl_request(nd, vp, p, cred, stuff);
4577         if (error)
4578                 return (error);
4579         if (!nd->nd_repstat)
4580                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4581                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4582         else
4583                 error = nd->nd_repstat;
4584         mbuf_freem(nd->nd_mrep);
4585         return (error);
4586 }
4587
4588 /*
4589  * nfs setacl call.
4590  */
4591 APPLESTATIC int
4592 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4593     struct acl *aclp, void *stuff)
4594 {
4595         int error;
4596         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4597         
4598         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4599                 return (EOPNOTSUPP);
4600         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4601         return (error);
4602 }
4603
4604 /*
4605  * nfs setacl call.
4606  */
4607 static int
4608 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4609     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4610 {
4611         struct nfsrv_descript nfsd, *nd = &nfsd;
4612         int error;
4613         nfsattrbit_t attrbits;
4614         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4615         
4616         if (!NFSHASNFSV4(nmp))
4617                 return (EOPNOTSUPP);
4618         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4619         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4620         NFSZERO_ATTRBIT(&attrbits);
4621         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4622         (void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4623             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4624         error = nfscl_request(nd, vp, p, cred, stuff);
4625         if (error)
4626                 return (error);
4627         /* Don't care about the pre/postop attributes */
4628         mbuf_freem(nd->nd_mrep);
4629         return (nd->nd_repstat);
4630 }
4631
4632 /*
4633  * Do the NFSv4.1 Exchange ID.
4634  */
4635 int
4636 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4637     struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4638     struct ucred *cred, NFSPROC_T *p)
4639 {
4640         uint32_t *tl, v41flags;
4641         struct nfsrv_descript nfsd;
4642         struct nfsrv_descript *nd = &nfsd;
4643         struct nfsclds *dsp;
4644         struct timespec verstime;
4645         int error, len;
4646
4647         *dspp = NULL;
4648         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL, 0, 0);
4649         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4650         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4651         *tl = txdr_unsigned(clp->nfsc_rev);
4652         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4653
4654         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4655         *tl++ = txdr_unsigned(exchflags);
4656         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4657
4658         /* Set the implementation id4 */
4659         *tl = txdr_unsigned(1);
4660         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4661         (void) nfsm_strtom(nd, version, strlen(version));
4662         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4663         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4664         verstime.tv_nsec = 0;
4665         txdr_nfsv4time(&verstime, tl);
4666         nd->nd_flag |= ND_USEGSSNAME;
4667         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4668             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4669         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4670             (int)nd->nd_repstat);
4671         if (error != 0)
4672                 return (error);
4673         if (nd->nd_repstat == 0) {
4674                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4675                 len = fxdr_unsigned(int, *(tl + 7));
4676                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4677                         error = NFSERR_BADXDR;
4678                         goto nfsmout;
4679                 }
4680                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4681                     M_WAITOK | M_ZERO);
4682                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4683                 dsp->nfsclds_servownlen = len;
4684                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4685                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4686                 dsp->nfsclds_sess.nfsess_sequenceid =
4687                     fxdr_unsigned(uint32_t, *tl++);
4688                 v41flags = fxdr_unsigned(uint32_t, *tl);
4689                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4690                     NFSHASPNFSOPT(nmp)) {
4691                         NFSCL_DEBUG(1, "set PNFS\n");
4692                         NFSLOCKMNT(nmp);
4693                         nmp->nm_state |= NFSSTA_PNFS;
4694                         NFSUNLOCKMNT(nmp);
4695                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4696                 }
4697                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4698                         dsp->nfsclds_flags |= NFSCLDS_DS;
4699                 if (len > 0)
4700                         nd->nd_repstat = nfsrv_mtostr(nd,
4701                             dsp->nfsclds_serverown, len);
4702                 if (nd->nd_repstat == 0) {
4703                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4704                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4705                             NULL, MTX_DEF);
4706                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4707                         *dspp = dsp;
4708                 } else
4709                         free(dsp, M_NFSCLDS);
4710         }
4711         error = nd->nd_repstat;
4712 nfsmout:
4713         mbuf_freem(nd->nd_mrep);
4714         return (error);
4715 }
4716
4717 /*
4718  * Do the NFSv4.1 Create Session.
4719  */
4720 int
4721 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4722     struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4723     NFSPROC_T *p)
4724 {
4725         uint32_t crflags, maxval, *tl;
4726         struct nfsrv_descript nfsd;
4727         struct nfsrv_descript *nd = &nfsd;
4728         int error, irdcnt;
4729
4730         /* Make sure nm_rsize, nm_wsize is set. */
4731         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4732                 nmp->nm_rsize = NFS_MAXBSIZE;
4733         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4734                 nmp->nm_wsize = NFS_MAXBSIZE;
4735         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL, 0,
4736             0);
4737         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4738         *tl++ = sep->nfsess_clientid.lval[0];
4739         *tl++ = sep->nfsess_clientid.lval[1];
4740         *tl++ = txdr_unsigned(sequenceid);
4741         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4742         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4743                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
4744         *tl = txdr_unsigned(crflags);
4745
4746         /* Fill in fore channel attributes. */
4747         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4748         *tl++ = 0;                              /* Header pad size */
4749         *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
4750         *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
4751         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4752         *tl++ = txdr_unsigned(20);              /* Max operations */
4753         *tl++ = txdr_unsigned(64);              /* Max slots */
4754         *tl = 0;                                /* No rdma ird */
4755
4756         /* Fill in back channel attributes. */
4757         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4758         *tl++ = 0;                              /* Header pad size */
4759         *tl++ = txdr_unsigned(10000);           /* Max request size */
4760         *tl++ = txdr_unsigned(10000);           /* Max response size */
4761         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4762         *tl++ = txdr_unsigned(4);               /* Max operations */
4763         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
4764         *tl = 0;                                /* No rdma ird */
4765
4766         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4767         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
4768
4769         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
4770         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
4771         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
4772         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4773         *tl++ = 0;                              /* Null machine name */
4774         *tl++ = 0;                              /* Uid == 0 */
4775         *tl++ = 0;                              /* Gid == 0 */
4776         *tl = 0;                                /* No additional gids */
4777         nd->nd_flag |= ND_USEGSSNAME;
4778         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4779             NFS_VER4, NULL, 1, NULL, NULL);
4780         if (error != 0)
4781                 return (error);
4782         if (nd->nd_repstat == 0) {
4783                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4784                     2 * NFSX_UNSIGNED);
4785                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4786                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4787                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4788                 crflags = fxdr_unsigned(uint32_t, *tl);
4789                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4790                         NFSLOCKMNT(nmp);
4791                         nmp->nm_state |= NFSSTA_SESSPERSIST;
4792                         NFSUNLOCKMNT(nmp);
4793                 }
4794
4795                 /* Get the fore channel slot count. */
4796                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4797                 tl++;                   /* Skip the header pad size. */
4798
4799                 /* Make sure nm_wsize is small enough. */
4800                 maxval = fxdr_unsigned(uint32_t, *tl++);
4801                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4802                         if (nmp->nm_wsize > 8096)
4803                                 nmp->nm_wsize /= 2;
4804                         else
4805                                 break;
4806                 }
4807
4808                 /* Make sure nm_rsize is small enough. */
4809                 maxval = fxdr_unsigned(uint32_t, *tl++);
4810                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4811                         if (nmp->nm_rsize > 8096)
4812                                 nmp->nm_rsize /= 2;
4813                         else
4814                                 break;
4815                 }
4816
4817                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4818                 tl++;
4819                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4820                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4821                 irdcnt = fxdr_unsigned(int, *tl);
4822                 if (irdcnt > 0)
4823                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4824
4825                 /* and the back channel slot count. */
4826                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4827                 tl += 5;
4828                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4829                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4830         }
4831         error = nd->nd_repstat;
4832 nfsmout:
4833         mbuf_freem(nd->nd_mrep);
4834         return (error);
4835 }
4836
4837 /*
4838  * Do the NFSv4.1 Destroy Session.
4839  */
4840 int
4841 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4842     struct ucred *cred, NFSPROC_T *p)
4843 {
4844         uint32_t *tl;
4845         struct nfsrv_descript nfsd;
4846         struct nfsrv_descript *nd = &nfsd;
4847         int error;
4848         struct nfsclsession *tsep;
4849
4850         nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4851             0);
4852         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4853         tsep = nfsmnt_mdssession(nmp);
4854         bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4855         nd->nd_flag |= ND_USEGSSNAME;
4856         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4857             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4858         if (error != 0)
4859                 return (error);
4860         error = nd->nd_repstat;
4861         mbuf_freem(nd->nd_mrep);
4862         return (error);
4863 }
4864
4865 /*
4866  * Do the NFSv4.1 Destroy Client.
4867  */
4868 int
4869 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4870     struct ucred *cred, NFSPROC_T *p)
4871 {
4872         uint32_t *tl;
4873         struct nfsrv_descript nfsd;
4874         struct nfsrv_descript *nd = &nfsd;
4875         int error;
4876         struct nfsclsession *tsep;
4877
4878         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
4879             0);
4880         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4881         tsep = nfsmnt_mdssession(nmp);
4882         *tl++ = tsep->nfsess_clientid.lval[0];
4883         *tl = tsep->nfsess_clientid.lval[1];
4884         nd->nd_flag |= ND_USEGSSNAME;
4885         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4886             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4887         if (error != 0)
4888                 return (error);
4889         error = nd->nd_repstat;
4890         mbuf_freem(nd->nd_mrep);
4891         return (error);
4892 }
4893
4894 /*
4895  * Do the NFSv4.1 LayoutGet.
4896  */
4897 static int
4898 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4899     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
4900     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
4901     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
4902     void *stuff)
4903 {
4904         struct nfsrv_descript nfsd, *nd = &nfsd;
4905         int error;
4906
4907         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
4908             0);
4909         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
4910             layouttype, layoutlen, 0);
4911         nd->nd_flag |= ND_USEGSSNAME;
4912         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4913             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4914         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
4915         if (error != 0)
4916                 return (error);
4917         if (nd->nd_repstat == 0)
4918                 error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
4919         if (error == 0 && nd->nd_repstat != 0)
4920                 error = nd->nd_repstat;
4921         mbuf_freem(nd->nd_mrep);
4922         return (error);
4923 }
4924
4925 /*
4926  * Do the NFSv4.1 Get Device Info.
4927  */
4928 int
4929 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4930     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4931     NFSPROC_T *p)
4932 {
4933         uint32_t cnt, *tl, vers, minorvers;
4934         struct nfsrv_descript nfsd;
4935         struct nfsrv_descript *nd = &nfsd;
4936         struct sockaddr_in sin, ssin;
4937         struct sockaddr_in6 sin6, ssin6;
4938         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
4939         struct nfscldevinfo *ndi;
4940         int addrcnt = 0, bitcnt, error, gotvers, i, isudp, j, stripecnt;
4941         uint8_t stripeindex;
4942         sa_family_t af, safilled;
4943
4944         *ndip = NULL;
4945         ndi = NULL;
4946         gotdspp = NULL;
4947         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
4948             0);
4949         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4950         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4951         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4952         *tl++ = txdr_unsigned(layouttype);
4953         *tl++ = txdr_unsigned(100000);
4954         if (notifybitsp != NULL && *notifybitsp != 0) {
4955                 *tl = txdr_unsigned(1);         /* One word of bits. */
4956                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4957                 *tl = txdr_unsigned(*notifybitsp);
4958         } else
4959                 *tl = txdr_unsigned(0);
4960         nd->nd_flag |= ND_USEGSSNAME;
4961         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4962             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4963         if (error != 0)
4964                 return (error);
4965         if (nd->nd_repstat == 0) {
4966                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4967                 if (layouttype != fxdr_unsigned(int, *tl))
4968                         printf("EEK! devinfo layout type not same!\n");
4969                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
4970                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4971                         stripecnt = fxdr_unsigned(int, *tl);
4972                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4973                         if (stripecnt < 1 || stripecnt > 4096) {
4974                                 printf("pNFS File layout devinfo stripecnt %d:"
4975                                     " out of range\n", stripecnt);
4976                                 error = NFSERR_BADXDR;
4977                                 goto nfsmout;
4978                         }
4979                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
4980                             NFSX_UNSIGNED);
4981                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4982                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4983                         if (addrcnt < 1 || addrcnt > 128) {
4984                                 printf("NFS devinfo addrcnt %d: out of range\n",
4985                                     addrcnt);
4986                                 error = NFSERR_BADXDR;
4987                                 goto nfsmout;
4988                         }
4989         
4990                         /*
4991                          * Now we know how many stripe indices and addresses, so
4992                          * we can allocate the structure the correct size.
4993                          */
4994                         i = (stripecnt * sizeof(uint8_t)) /
4995                             sizeof(struct nfsclds *) + 1;
4996                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4997                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4998                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
4999                             M_ZERO);
5000                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5001                             NFSX_V4DEVICEID);
5002                         ndi->nfsdi_refcnt = 0;
5003                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5004                         ndi->nfsdi_stripecnt = stripecnt;
5005                         ndi->nfsdi_addrcnt = addrcnt;
5006                         /* Fill in the stripe indices. */
5007                         for (i = 0; i < stripecnt; i++) {
5008                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5009                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5010                                 if (stripeindex >= addrcnt) {
5011                                         printf("pNFS File Layout devinfo"
5012                                             " stripeindex %d: too big\n",
5013                                             (int)stripeindex);
5014                                         error = NFSERR_BADXDR;
5015                                         goto nfsmout;
5016                                 }
5017                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5018                         }
5019                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5020                         /* For Flex File, we only get one address list. */
5021                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5022                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5023                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5024                             NFSX_V4DEVICEID);
5025                         ndi->nfsdi_refcnt = 0;
5026                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5027                         addrcnt = ndi->nfsdi_addrcnt = 1;
5028                 }
5029
5030                 /* Now, dissect the server address(es). */
5031                 safilled = AF_UNSPEC;
5032                 for (i = 0; i < addrcnt; i++) {
5033                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5034                         cnt = fxdr_unsigned(uint32_t, *tl);
5035                         if (cnt == 0) {
5036                                 printf("NFS devinfo 0 len addrlist\n");
5037                                 error = NFSERR_BADXDR;
5038                                 goto nfsmout;
5039                         }
5040                         dspp = nfsfldi_addr(ndi, i);
5041                         safilled = AF_UNSPEC;
5042                         for (j = 0; j < cnt; j++) {
5043                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5044                                     &isudp);
5045                                 if (error != 0 && error != EPERM) {
5046                                         error = NFSERR_BADXDR;
5047                                         goto nfsmout;
5048                                 }
5049                                 if (error == 0 && isudp == 0) {
5050                                         /*
5051                                          * The priority is:
5052                                          * - Same address family.
5053                                          * Save the address and dspp, so that
5054                                          * the connection can be done after
5055                                          * parsing is complete.
5056                                          */
5057                                         if (safilled == AF_UNSPEC ||
5058                                             (af == nmp->nm_nam->sa_family &&
5059                                              safilled != nmp->nm_nam->sa_family)
5060                                            ) {
5061                                                 if (af == AF_INET)
5062                                                         ssin = sin;
5063                                                 else
5064                                                         ssin6 = sin6;
5065                                                 safilled = af;
5066                                                 gotdspp = dspp;
5067                                         }
5068                                 }
5069                         }
5070                 }
5071
5072                 gotvers = NFS_VER4;     /* Always NFSv4 for File Layout. */
5073                 /* For Flex File, we will take one of the versions to use. */
5074                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5075                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5076                         j = fxdr_unsigned(int, *tl);
5077                         if (j < 1 || j > NFSDEV_MAXVERS) {
5078                                 printf("pNFS: too many versions\n");
5079                                 error = NFSERR_BADXDR;
5080                                 goto nfsmout;
5081                         }
5082                         gotvers = 0;
5083                         for (i = 0; i < j; i++) {
5084                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5085                                 vers = fxdr_unsigned(uint32_t, *tl++);
5086                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5087                                 if ((vers == NFS_VER4 && minorvers ==
5088                                     NFSV41_MINORVERSION) || (vers == NFS_VER3 &&
5089                                     gotvers == 0)) {
5090                                         gotvers = vers;
5091                                         /* We'll take this one. */
5092                                         ndi->nfsdi_versindex = i;
5093                                         ndi->nfsdi_vers = vers;
5094                                         ndi->nfsdi_minorvers = minorvers;
5095                                         ndi->nfsdi_rsize = fxdr_unsigned(
5096                                             uint32_t, *tl++);
5097                                         ndi->nfsdi_wsize = fxdr_unsigned(
5098                                             uint32_t, *tl++);
5099                                         if (*tl == newnfs_true)
5100                                                 ndi->nfsdi_flags |=
5101                                                     NFSDI_TIGHTCOUPLED;
5102                                         else
5103                                                 ndi->nfsdi_flags &=
5104                                                     ~NFSDI_TIGHTCOUPLED;
5105                                 }
5106                         }
5107                         if (gotvers == 0) {
5108                                 printf("pNFS: no NFSv3 or NFSv4.1\n");
5109                                 error = NFSERR_BADXDR;
5110                                 goto nfsmout;
5111                         }
5112                 }
5113
5114                 /* And the notify bits. */
5115                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5116                 bitcnt = fxdr_unsigned(int, *tl);
5117                 if (bitcnt > 0) {
5118                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5119                         if (notifybitsp != NULL)
5120                                 *notifybitsp =
5121                                     fxdr_unsigned(uint32_t, *tl);
5122                 }
5123                 if (safilled != AF_UNSPEC) {
5124                         KASSERT(ndi != NULL, ("ndi is NULL"));
5125                         *ndip = ndi;
5126                 } else
5127                         error = EPERM;
5128                 if (error == 0) {
5129                         /*
5130                          * Now we can do a TCP connection for the correct
5131                          * NFS version and IP address.
5132                          */
5133                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5134                             gotvers, &dsp, p);
5135                 }
5136                 if (error == 0) {
5137                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5138                         *gotdspp = dsp;
5139                 }
5140         }
5141         if (nd->nd_repstat != 0 && error == 0)
5142                 error = nd->nd_repstat;
5143 nfsmout:
5144         if (error != 0 && ndi != NULL)
5145                 nfscl_freedevinfo(ndi);
5146         mbuf_freem(nd->nd_mrep);
5147         return (error);
5148 }
5149
5150 /*
5151  * Do the NFSv4.1 LayoutCommit.
5152  */
5153 int
5154 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5155     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5156     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5157 {
5158         uint32_t *tl;
5159         struct nfsrv_descript nfsd, *nd = &nfsd;
5160         int error;
5161
5162         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5163             0, 0);
5164         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5165             NFSX_STATEID);
5166         txdr_hyper(off, tl);
5167         tl += 2;
5168         txdr_hyper(len, tl);
5169         tl += 2;
5170         if (reclaim != 0)
5171                 *tl++ = newnfs_true;
5172         else
5173                 *tl++ = newnfs_false;
5174         *tl++ = txdr_unsigned(stateidp->seqid);
5175         *tl++ = stateidp->other[0];
5176         *tl++ = stateidp->other[1];
5177         *tl++ = stateidp->other[2];
5178         *tl++ = newnfs_true;
5179         if (lastbyte < off)
5180                 lastbyte = off;
5181         else if (lastbyte >= (off + len))
5182                 lastbyte = off + len - 1;
5183         txdr_hyper(lastbyte, tl);
5184         tl += 2;
5185         *tl++ = newnfs_false;
5186         *tl++ = txdr_unsigned(layouttype);
5187         /* All supported layouts are 0 length. */
5188         *tl = txdr_unsigned(0);
5189         nd->nd_flag |= ND_USEGSSNAME;
5190         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5191             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5192         if (error != 0)
5193                 return (error);
5194         error = nd->nd_repstat;
5195         mbuf_freem(nd->nd_mrep);
5196         return (error);
5197 }
5198
5199 /*
5200  * Do the NFSv4.1 LayoutReturn.
5201  */
5202 int
5203 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5204     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5205     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5206     uint32_t stat, uint32_t op, char *devid)
5207 {
5208         uint32_t *tl;
5209         struct nfsrv_descript nfsd, *nd = &nfsd;
5210         uint64_t tu64;
5211         int error;
5212
5213         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5214             0, 0);
5215         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5216         if (reclaim != 0)
5217                 *tl++ = newnfs_true;
5218         else
5219                 *tl++ = newnfs_false;
5220         *tl++ = txdr_unsigned(layouttype);
5221         *tl++ = txdr_unsigned(iomode);
5222         *tl = txdr_unsigned(layoutreturn);
5223         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5224                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5225                     NFSX_UNSIGNED);
5226                 txdr_hyper(offset, tl);
5227                 tl += 2;
5228                 txdr_hyper(len, tl);
5229                 tl += 2;
5230                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5231                 *tl++ = txdr_unsigned(stateidp->seqid);
5232                 *tl++ = stateidp->other[0];
5233                 *tl++ = stateidp->other[1];
5234                 *tl++ = stateidp->other[2];
5235                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5236                         *tl = txdr_unsigned(0);
5237                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5238                         if (stat != 0) {
5239                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5240                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5241                                     NFSX_UNSIGNED);
5242                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5243                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5244                                     NFSX_UNSIGNED);
5245                                 *tl++ = txdr_unsigned(1);       /* One error. */
5246                                 tu64 = 0;                       /* Offset. */
5247                                 txdr_hyper(tu64, tl); tl += 2;
5248                                 tu64 = UINT64_MAX;              /* Length. */
5249                                 txdr_hyper(tu64, tl); tl += 2;
5250                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5251                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5252                                 *tl++ = txdr_unsigned(1);       /* One error. */
5253                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5254                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5255                                 *tl++ = txdr_unsigned(stat);
5256                                 *tl++ = txdr_unsigned(op);
5257                         } else {
5258                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5259                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5260                                 /* No ioerrs. */
5261                                 *tl++ = 0;
5262                         }
5263                         *tl = 0;        /* No stats yet. */
5264                 }
5265         }
5266         nd->nd_flag |= ND_USEGSSNAME;
5267         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5268             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5269         if (error != 0)
5270                 return (error);
5271         if (nd->nd_repstat == 0) {
5272                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5273                 if (*tl != 0) {
5274                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5275                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5276                         stateidp->other[0] = *tl++;
5277                         stateidp->other[1] = *tl++;
5278                         stateidp->other[2] = *tl;
5279                 }
5280         } else
5281                 error = nd->nd_repstat;
5282 nfsmout:
5283         mbuf_freem(nd->nd_mrep);
5284         return (error);
5285 }
5286
5287 /*
5288  * Acquire a layout and devinfo, if possible. The caller must have acquired
5289  * a reference count on the nfsclclient structure before calling this.
5290  * Return the layout in lypp with a reference count on it, if successful.
5291  */
5292 static int
5293 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5294     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5295     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5296 {
5297         struct nfscllayout *lyp;
5298         struct nfsclflayout *flp;
5299         struct nfsclflayouthead flh;
5300         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5301         nfsv4stateid_t stateid;
5302         struct nfsclsession *tsep;
5303
5304         *lypp = NULL;
5305         if (NFSHASFLEXFILE(nmp))
5306                 layouttype = NFSLAYOUT_FLEXFILE;
5307         else
5308                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5309         /*
5310          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5311          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5312          * flp == NULL.
5313          */
5314         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5315             off, &flp, &recalled);
5316         islocked = 0;
5317         if (lyp == NULL || flp == NULL) {
5318                 if (recalled != 0)
5319                         return (EIO);
5320                 LIST_INIT(&flh);
5321                 tsep = nfsmnt_mdssession(nmp);
5322                 layoutlen = tsep->nfsess_maxcache -
5323                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5324                 if (lyp == NULL) {
5325                         stateid.seqid = 0;
5326                         stateid.other[0] = stateidp->other[0];
5327                         stateid.other[1] = stateidp->other[1];
5328                         stateid.other[2] = stateidp->other[2];
5329                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5330                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5331                             (uint64_t)0, layouttype, layoutlen, &stateid,
5332                             &retonclose, &flh, cred, p, NULL);
5333                 } else {
5334                         islocked = 1;
5335                         stateid.seqid = lyp->nfsly_stateid.seqid;
5336                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5337                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5338                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5339                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5340                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5341                             (uint64_t)0, layouttype, layoutlen, &stateid,
5342                             &retonclose, &flh, cred, p, NULL);
5343                 }
5344                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5345                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5346                     &flh, layouttype, error, NULL, cred, p);
5347                 if (error == 0)
5348                         *lypp = lyp;
5349                 else if (islocked != 0)
5350                         nfscl_rellayout(lyp, 1);
5351         } else
5352                 *lypp = lyp;
5353         return (error);
5354 }
5355
5356 /*
5357  * Do a TCP connection plus exchange id and create session.
5358  * If successful, a "struct nfsclds" is linked into the list for the
5359  * mount point and a pointer to it is returned.
5360  */
5361 static int
5362 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5363     struct sockaddr_in6 *sin6, sa_family_t af, int vers, struct nfsclds **dspp,
5364     NFSPROC_T *p)
5365 {
5366         struct sockaddr_in *msad, *sad;
5367         struct sockaddr_in6 *msad6, *sad6;
5368         struct nfsclclient *clp;
5369         struct nfssockreq *nrp;
5370         struct nfsclds *dsp, *tdsp;
5371         int error;
5372         enum nfsclds_state retv;
5373         uint32_t sequenceid;
5374
5375         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5376             ("nfsrpc_fillsa: NULL nr_cred"));
5377         NFSLOCKCLSTATE();
5378         clp = nmp->nm_clp;
5379         NFSUNLOCKCLSTATE();
5380         if (clp == NULL)
5381                 return (EPERM);
5382         if (af == AF_INET) {
5383                 NFSLOCKMNT(nmp);
5384                 /*
5385                  * Check to see if we already have a session for this
5386                  * address that is usable for a DS.
5387                  * Note that the MDS's address is in a different place
5388                  * than the sessions already acquired for DS's.
5389                  */
5390                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5391                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5392                 while (tdsp != NULL) {
5393                         if (msad != NULL && msad->sin_family == AF_INET &&
5394                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5395                             sin->sin_port == msad->sin_port &&
5396                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5397                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5398                                 *dspp = tdsp;
5399                                 NFSUNLOCKMNT(nmp);
5400                                 NFSCL_DEBUG(4, "fnd same addr\n");
5401                                 return (0);
5402                         }
5403                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5404                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5405                                 msad = (struct sockaddr_in *)
5406                                     tdsp->nfsclds_sockp->nr_nam;
5407                         else
5408                                 msad = NULL;
5409                 }
5410                 NFSUNLOCKMNT(nmp);
5411
5412                 /* No IP address match, so look for new/trunked one. */
5413                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5414                 sad->sin_len = sizeof(*sad);
5415                 sad->sin_family = AF_INET;
5416                 sad->sin_port = sin->sin_port;
5417                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5418                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5419                 nrp->nr_nam = (struct sockaddr *)sad;
5420         } else if (af == AF_INET6) {
5421                 NFSLOCKMNT(nmp);
5422                 /*
5423                  * Check to see if we already have a session for this
5424                  * address that is usable for a DS.
5425                  * Note that the MDS's address is in a different place
5426                  * than the sessions already acquired for DS's.
5427                  */
5428                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5429                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5430                 while (tdsp != NULL) {
5431                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5432                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5433                             &msad6->sin6_addr) &&
5434                             sin6->sin6_port == msad6->sin6_port &&
5435                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5436                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5437                                 *dspp = tdsp;
5438                                 NFSUNLOCKMNT(nmp);
5439                                 return (0);
5440                         }
5441                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5442                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5443                                 msad6 = (struct sockaddr_in6 *)
5444                                     tdsp->nfsclds_sockp->nr_nam;
5445                         else
5446                                 msad6 = NULL;
5447                 }
5448                 NFSUNLOCKMNT(nmp);
5449
5450                 /* No IP address match, so look for new/trunked one. */
5451                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5452                 sad6->sin6_len = sizeof(*sad6);
5453                 sad6->sin6_family = AF_INET6;
5454                 sad6->sin6_port = sin6->sin6_port;
5455                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5456                     sizeof(struct in6_addr));
5457                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5458                 nrp->nr_nam = (struct sockaddr *)sad6;
5459         } else
5460                 return (EPERM);
5461
5462         nrp->nr_sotype = SOCK_STREAM;
5463         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5464         nrp->nr_prog = NFS_PROG;
5465         nrp->nr_vers = vers;
5466
5467         /*
5468          * Use the credentials that were used for the mount, which are
5469          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5470          * Ref. counting the credentials with crhold() is probably not
5471          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5472          * unmount, but I did it anyhow.
5473          */
5474         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5475         error = newnfs_connect(nmp, nrp, NULL, p, 0);
5476         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5477
5478         dsp = NULL;
5479         /* Now, do the exchangeid and create session. */
5480         if (error == 0) {
5481                 if (vers == NFS_VER4) {
5482                         error = nfsrpc_exchangeid(nmp, clp, nrp,
5483                             NFSV4EXCH_USEPNFSDS, &dsp, nrp->nr_cred, p);
5484                         NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5485                         if (error != 0)
5486                                 newnfs_disconnect(nrp);
5487                 } else {
5488                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5489                             M_WAITOK | M_ZERO);
5490                         dsp->nfsclds_flags |= NFSCLDS_DS;
5491                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5492                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5493                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5494                             NULL, MTX_DEF);
5495                 }
5496         }
5497         if (error == 0) {
5498                 dsp->nfsclds_sockp = nrp;
5499                 if (vers == NFS_VER4) {
5500                         NFSLOCKMNT(nmp);
5501                         retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5502                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5503                         if (retv == NFSDSP_USETHISSESSION) {
5504                                 NFSUNLOCKMNT(nmp);
5505                                 /*
5506                                  * If there is already a session for this
5507                                  * server, use it.
5508                                  */
5509                                 (void)newnfs_disconnect(nrp);
5510                                 nfscl_freenfsclds(dsp);
5511                                 *dspp = tdsp;
5512                                 return (0);
5513                         }
5514                         if (retv == NFSDSP_SEQTHISSESSION)
5515                                 sequenceid =
5516                                     tdsp->nfsclds_sess.nfsess_sequenceid;
5517                         else
5518                                 sequenceid =
5519                                     dsp->nfsclds_sess.nfsess_sequenceid;
5520                         NFSUNLOCKMNT(nmp);
5521                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5522                             nrp, sequenceid, 0, nrp->nr_cred, p);
5523                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5524                 }
5525         } else {
5526                 NFSFREECRED(nrp->nr_cred);
5527                 NFSFREEMUTEX(&nrp->nr_mtx);
5528                 free(nrp->nr_nam, M_SONAME);
5529                 free(nrp, M_NFSSOCKREQ);
5530         }
5531         if (error == 0) {
5532                 NFSCL_DEBUG(3, "add DS session\n");
5533                 /*
5534                  * Put it at the end of the list. That way the list
5535                  * is ordered by when the entry was added. This matters
5536                  * since the one done first is the one that should be
5537                  * used for sequencid'ing any subsequent create sessions.
5538                  */
5539                 NFSLOCKMNT(nmp);
5540                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5541                 NFSUNLOCKMNT(nmp);
5542                 *dspp = dsp;
5543         } else if (dsp != NULL) {
5544                 newnfs_disconnect(nrp);
5545                 nfscl_freenfsclds(dsp);
5546         }
5547         return (error);
5548 }
5549
5550 /*
5551  * Do the NFSv4.1 Reclaim Complete.
5552  */
5553 int
5554 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5555 {
5556         uint32_t *tl;
5557         struct nfsrv_descript nfsd;
5558         struct nfsrv_descript *nd = &nfsd;
5559         int error;
5560
5561         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5562             0);
5563         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5564         *tl = newnfs_false;
5565         nd->nd_flag |= ND_USEGSSNAME;
5566         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5567             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5568         if (error != 0)
5569                 return (error);
5570         error = nd->nd_repstat;
5571         mbuf_freem(nd->nd_mrep);
5572         return (error);
5573 }
5574
5575 /*
5576  * Initialize the slot tables for a session.
5577  */
5578 static void
5579 nfscl_initsessionslots(struct nfsclsession *sep)
5580 {
5581         int i;
5582
5583         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5584                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5585                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5586                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5587         }
5588         for (i = 0; i < 64; i++)
5589                 sep->nfsess_slotseq[i] = 0;
5590         sep->nfsess_slots = 0;
5591 }
5592
5593 /*
5594  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5595  */
5596 int
5597 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5598     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5599 {
5600         struct nfsnode *np = VTONFS(vp);
5601         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5602         struct nfscllayout *layp;
5603         struct nfscldevinfo *dip;
5604         struct nfsclflayout *rflp;
5605         struct mbuf *m;
5606         struct nfsclwritedsdorpc *drpc, *tdrpc;
5607         nfsv4stateid_t stateid;
5608         struct ucred *newcred;
5609         uint64_t lastbyte, len, off, oresid, xfer;
5610         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5611         void *lckp;
5612         uint8_t *dev;
5613         void *iovbase;
5614         size_t iovlen;
5615         off_t offs;
5616         ssize_t resid;
5617
5618         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5619             (np->n_flag & NNOLAYOUT) != 0)
5620                 return (EIO);
5621         /* Now, get a reference cnt on the clientid for this mount. */
5622         if (nfscl_getref(nmp) == 0)
5623                 return (EIO);
5624
5625         /* Find an appropriate stateid. */
5626         newcred = NFSNEWCRED(cred);
5627         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5628             rwaccess, 1, newcred, p, &stateid, &lckp);
5629         if (error != 0) {
5630                 NFSFREECRED(newcred);
5631                 nfscl_relref(nmp);
5632                 return (error);
5633         }
5634         /* Search for a layout for this file. */
5635         off = uiop->uio_offset;
5636         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5637             np->n_fhp->nfh_len, off, &rflp, &recalled);
5638         if (layp == NULL || rflp == NULL) {
5639                 if (recalled != 0) {
5640                         NFSFREECRED(newcred);
5641                         nfscl_relref(nmp);
5642                         return (EIO);
5643                 }
5644                 if (layp != NULL) {
5645                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5646                         layp = NULL;
5647                 }
5648                 /* Try and get a Layout, if it is supported. */
5649                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5650                     (np->n_flag & NWRITEOPENED) != 0)
5651                         iolaymode = NFSLAYOUTIOMODE_RW;
5652                 else
5653                         iolaymode = NFSLAYOUTIOMODE_READ;
5654                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5655                     NULL, &stateid, off, &layp, newcred, p);
5656                 if (error != 0) {
5657                         NFSLOCKNODE(np);
5658                         np->n_flag |= NNOLAYOUT;
5659                         NFSUNLOCKNODE(np);
5660                         if (lckp != NULL)
5661                                 nfscl_lockderef(lckp);
5662                         NFSFREECRED(newcred);
5663                         if (layp != NULL)
5664                                 nfscl_rellayout(layp, 0);
5665                         nfscl_relref(nmp);
5666                         return (error);
5667                 }
5668         }
5669
5670         /*
5671          * Loop around finding a layout that works for the first part of
5672          * this I/O operation, and then call the function that actually
5673          * does the RPC.
5674          */
5675         eof = 0;
5676         len = (uint64_t)uiop->uio_resid;
5677         while (len > 0 && error == 0 && eof == 0) {
5678                 off = uiop->uio_offset;
5679                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5680                 if (error == 0) {
5681                         oresid = xfer = (uint64_t)uiop->uio_resid;
5682                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5683                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5684                         /*
5685                          * For Flex File layout with mirrored DSs, select one
5686                          * of them at random for reads. For writes and commits,
5687                          * do all mirrors.
5688                          */
5689                         m = NULL;
5690                         drpc = NULL;
5691                         firstmirror = 0;
5692                         mirrorcnt = 1;
5693                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5694                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5695                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5696                                         firstmirror = arc4random() % mirrorcnt;
5697                                         mirrorcnt = firstmirror + 1;
5698                                 } else {
5699                                         if (docommit == 0) {
5700                                                 /*
5701                                                  * Save values, so uiop can be
5702                                                  * rolled back upon a write
5703                                                  * error.
5704                                                  */
5705                                                 offs = uiop->uio_offset;
5706                                                 resid = uiop->uio_resid;
5707                                                 iovbase =
5708                                                     uiop->uio_iov->iov_base;
5709                                                 iovlen = uiop->uio_iov->iov_len;
5710                                                 m = nfsm_uiombuflist(uiop, len,
5711                                                     NULL, NULL);
5712                                         }
5713                                         tdrpc = drpc = malloc(sizeof(*drpc) *
5714                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
5715                                             M_ZERO);
5716                                 }
5717                         }
5718                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5719                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0)
5720                                         dev = rflp->nfsfl_ffm[i].dev;
5721                                 else
5722                                         dev = rflp->nfsfl_dev;
5723                                 dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5724                                     rflp->nfsfl_devp);
5725                                 if (dip != NULL) {
5726                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5727                                             != 0)
5728                                                 error = nfscl_dofflayoutio(vp,
5729                                                     uiop, iomode, must_commit,
5730                                                     &eof, &stateid, rwaccess,
5731                                                     dip, layp, rflp, off, xfer,
5732                                                     i, docommit, m, tdrpc,
5733                                                     newcred, p);
5734                                         else
5735                                                 error = nfscl_doflayoutio(vp,
5736                                                     uiop, iomode, must_commit,
5737                                                     &eof, &stateid, rwaccess,
5738                                                     dip, layp, rflp, off, xfer,
5739                                                     docommit, newcred, p);
5740                                         nfscl_reldevinfo(dip);
5741                                 } else
5742                                         error = EIO;
5743                                 tdrpc++;
5744                         }
5745                         if (m != NULL)
5746                                 m_freem(m);
5747                         tdrpc = drpc;
5748                         timo = hz / 50;         /* Wait for 20msec. */
5749                         if (timo < 1)
5750                                 timo = 1;
5751                         for (i = firstmirror; i < mirrorcnt - 1 &&
5752                             tdrpc != NULL; i++, tdrpc++) {
5753                                 /*
5754                                  * For the unused drpc entries, both inprog and
5755                                  * err == 0, so this loop won't break.
5756                                  */
5757                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5758                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
5759                                             timo);
5760                                 if (error == 0 && tdrpc->err != 0)
5761                                         error = tdrpc->err;
5762                         }
5763                         free(drpc, M_TEMP);
5764                         if (error == 0) {
5765                                 if (mirrorcnt > 1 && rwaccess ==
5766                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5767                                         NFSLOCKCLSTATE();
5768                                         layp->nfsly_flags |= NFSLY_WRITTEN;
5769                                         NFSUNLOCKCLSTATE();
5770                                 }
5771                                 lastbyte = off + xfer - 1;
5772                                 NFSLOCKCLSTATE();
5773                                 if (lastbyte > layp->nfsly_lastbyte)
5774                                         layp->nfsly_lastbyte = lastbyte;
5775                                 NFSUNLOCKCLSTATE();
5776                         } else if (error == NFSERR_OPENMODE &&
5777                             rwaccess == NFSV4OPEN_ACCESSREAD) {
5778                                 NFSLOCKMNT(nmp);
5779                                 nmp->nm_state |= NFSSTA_OPENMODE;
5780                                 NFSUNLOCKMNT(nmp);
5781                         } else
5782                                 error = EIO;
5783                         if (error == 0)
5784                                 len -= (oresid - (uint64_t)uiop->uio_resid);
5785                         else if (mirrorcnt > 1 && rwaccess ==
5786                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5787                                 /*
5788                                  * In case the rpc gets retried, roll the
5789                                  * uio fields changed by nfsm_uiombuflist()
5790                                  * back.
5791                                  */
5792                                 uiop->uio_offset = offs;
5793                                 uiop->uio_resid = resid;
5794                                 uiop->uio_iov->iov_base = iovbase;
5795                                 uiop->uio_iov->iov_len = iovlen;
5796                         }
5797                 }
5798         }
5799         if (lckp != NULL)
5800                 nfscl_lockderef(lckp);
5801         NFSFREECRED(newcred);
5802         nfscl_rellayout(layp, 0);
5803         nfscl_relref(nmp);
5804         return (error);
5805 }
5806
5807 /*
5808  * Make a copy of the mbuf chain and add an mbuf for null padding, as required.
5809  */
5810 static struct mbuf *
5811 nfsm_copym(struct mbuf *m, int off, int xfer)
5812 {
5813         struct mbuf *m2, *m3, *m4;
5814         uint32_t *tl;
5815         int rem;
5816
5817         m2 = m_copym(m, off, xfer, M_WAITOK);
5818         rem = NFSM_RNDUP(xfer) - xfer;
5819         if (rem > 0) {
5820                 /*
5821                  * The zero padding to a multiple of 4 bytes is required by
5822                  * the XDR. So that the mbufs copied by reference aren't
5823                  * modified, add an mbuf with the zero'd bytes to the list.
5824                  * rem will be a maximum of 3, so one zero'd uint32_t is
5825                  * sufficient.
5826                  */
5827                 m3 = m2;
5828                 while (m3->m_next != NULL)
5829                         m3 = m3->m_next;
5830                 NFSMGET(m4);
5831                 tl = NFSMTOD(m4, uint32_t *);
5832                 *tl = 0;
5833                 mbuf_setlen(m4, rem);
5834                 mbuf_setnext(m3, m4);
5835         }
5836         return (m2);
5837 }
5838
5839 /*
5840  * Find a file layout that will handle the first bytes of the requested
5841  * range and return the information from it needed to the I/O operation.
5842  */
5843 int
5844 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5845     struct nfsclflayout **retflpp)
5846 {
5847         struct nfsclflayout *flp, *nflp, *rflp;
5848         uint32_t rw;
5849
5850         rflp = NULL;
5851         rw = rwaccess;
5852         /* For reading, do the Read list first and then the Write list. */
5853         do {
5854                 if (rw == NFSV4OPEN_ACCESSREAD)
5855                         flp = LIST_FIRST(&lyp->nfsly_flayread);
5856                 else
5857                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
5858                 while (flp != NULL) {
5859                         nflp = LIST_NEXT(flp, nfsfl_list);
5860                         if (flp->nfsfl_off > off)
5861                                 break;
5862                         if (flp->nfsfl_end > off &&
5863                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5864                                 rflp = flp;
5865                         flp = nflp;
5866                 }
5867                 if (rw == NFSV4OPEN_ACCESSREAD)
5868                         rw = NFSV4OPEN_ACCESSWRITE;
5869                 else
5870                         rw = 0;
5871         } while (rw != 0);
5872         if (rflp != NULL) {
5873                 /* This one covers the most bytes starting at off. */
5874                 *retflpp = rflp;
5875                 return (0);
5876         }
5877         return (EIO);
5878 }
5879
5880 /*
5881  * Do I/O using an NFSv4.1 file layout.
5882  */
5883 static int
5884 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5885     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5886     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5887     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
5888 {
5889         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5890         int commit_thru_mds, error, stripe_index, stripe_pos;
5891         struct nfsnode *np;
5892         struct nfsfh *fhp;
5893         struct nfsclds **dspp;
5894
5895         np = VTONFS(vp);
5896         rel_off = off - flp->nfsfl_patoff;
5897         stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5898         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5899             dp->nfsdi_stripecnt;
5900         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5901         error = 0;
5902
5903         /* Loop around, doing I/O for each stripe unit. */
5904         while (len > 0 && error == 0) {
5905                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5906                 dspp = nfsfldi_addr(dp, stripe_index);
5907                 if (len > transfer && docommit == 0)
5908                         xfer = transfer;
5909                 else
5910                         xfer = len;
5911                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5912                         /* Dense layout. */
5913                         if (stripe_pos >= flp->nfsfl_fhcnt)
5914                                 return (EIO);
5915                         fhp = flp->nfsfl_fh[stripe_pos];
5916                         io_off = (rel_off / (stripe_unit_size *
5917                             dp->nfsdi_stripecnt)) * stripe_unit_size +
5918                             rel_off % stripe_unit_size;
5919                 } else {
5920                         /* Sparse layout. */
5921                         if (flp->nfsfl_fhcnt > 1) {
5922                                 if (stripe_index >= flp->nfsfl_fhcnt)
5923                                         return (EIO);
5924                                 fhp = flp->nfsfl_fh[stripe_index];
5925                         } else if (flp->nfsfl_fhcnt == 1)
5926                                 fhp = flp->nfsfl_fh[0];
5927                         else
5928                                 fhp = np->n_fhp;
5929                         io_off = off;
5930                 }
5931                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
5932                         commit_thru_mds = 1;
5933                         if (docommit != 0)
5934                                 error = EIO;
5935                 } else {
5936                         commit_thru_mds = 0;
5937                         mtx_lock(&np->n_mtx);
5938                         np->n_flag |= NDSCOMMIT;
5939                         mtx_unlock(&np->n_mtx);
5940                 }
5941                 if (docommit != 0) {
5942                         if (error == 0)
5943                                 error = nfsrpc_commitds(vp, io_off, xfer,
5944                                     *dspp, fhp, 0, 0, cred, p);
5945                         if (error == 0) {
5946                                 /*
5947                                  * Set both eof and uio_resid = 0 to end any
5948                                  * loops.
5949                                  */
5950                                 *eofp = 1;
5951                                 uiop->uio_resid = 0;
5952                         } else {
5953                                 mtx_lock(&np->n_mtx);
5954                                 np->n_flag &= ~NDSCOMMIT;
5955                                 mtx_unlock(&np->n_mtx);
5956                         }
5957                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
5958                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5959                             io_off, xfer, fhp, 0, 0, 0, cred, p);
5960                 else {
5961                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5962                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5963                             0, 0, 0, cred, p);
5964                         if (error == 0) {
5965                                 NFSLOCKCLSTATE();
5966                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
5967                                 NFSUNLOCKCLSTATE();
5968                         }
5969                 }
5970                 if (error == 0) {
5971                         transfer = stripe_unit_size;
5972                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5973                         len -= xfer;
5974                         off += xfer;
5975                 }
5976         }
5977         return (error);
5978 }
5979
5980 /*
5981  * Do I/O using an NFSv4.1 flex file layout.
5982  */
5983 static int
5984 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5985     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5986     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5987     uint64_t len, int mirror, int docommit, struct mbuf *mp,
5988     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
5989 {
5990         uint64_t transfer, xfer;
5991         int error, rel_off;
5992         struct nfsnode *np;
5993         struct nfsfh *fhp;
5994         struct nfsclds **dspp;
5995         struct ucred *tcred;
5996         struct mbuf *m;
5997
5998         np = VTONFS(vp);
5999         error = 0;
6000         rel_off = 0;
6001         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6002             (uintmax_t)len);
6003         /* Loop around, doing I/O for each stripe unit. */
6004         while (len > 0 && error == 0) {
6005                 dspp = nfsfldi_addr(dp, 0);
6006                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6007                 stateidp = &flp->nfsfl_ffm[mirror].st;
6008                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6009                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6010                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6011                         tcred = NFSNEWCRED(cred);
6012                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6013                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6014                         tcred->cr_ngroups = 1;
6015                 } else
6016                         tcred = cred;
6017                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6018                         transfer = dp->nfsdi_rsize;
6019                 else
6020                         transfer = dp->nfsdi_wsize;
6021                 mtx_lock(&np->n_mtx);
6022                 np->n_flag |= NDSCOMMIT;
6023                 mtx_unlock(&np->n_mtx);
6024                 if (len > transfer && docommit == 0)
6025                         xfer = transfer;
6026                 else
6027                         xfer = len;
6028                 if (docommit != 0) {
6029                         if (error == 0) {
6030                                 /*
6031                                  * Do last mirrored DS commit with this thread.
6032                                  */
6033                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6034                                         error = nfsio_commitds(vp, off, xfer,
6035                                             *dspp, fhp, dp->nfsdi_vers,
6036                                             dp->nfsdi_minorvers, drpc, tcred,
6037                                             p);
6038                                 else
6039                                         error = nfsrpc_commitds(vp, off, xfer,
6040                                             *dspp, fhp, dp->nfsdi_vers,
6041                                             dp->nfsdi_minorvers, tcred, p);
6042                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6043                                 if (nfsds_failerr(error)) {
6044                                         NFSCL_DEBUG(4,
6045                                             "DS layreterr for commit\n");
6046                                         nfscl_dserr(NFSV4OP_COMMIT, dp, lyp);
6047                                 }
6048                         }
6049                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6050                         if (error == 0) {
6051                                 /*
6052                                  * Set both eof and uio_resid = 0 to end any
6053                                  * loops.
6054                                  */
6055                                 *eofp = 1;
6056                                 uiop->uio_resid = 0;
6057                         } else {
6058                                 mtx_lock(&np->n_mtx);
6059                                 np->n_flag &= ~NDSCOMMIT;
6060                                 mtx_unlock(&np->n_mtx);
6061                         }
6062                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6063                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6064                             off, xfer, fhp, 1, dp->nfsdi_vers,
6065                             dp->nfsdi_minorvers, tcred, p);
6066                         NFSCL_DEBUG(4, "readds=%d\n", error);
6067                         if (nfsds_failerr(error)) {
6068                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6069                                 nfscl_dserr(NFSV4OP_READ, dp, lyp);
6070                         }
6071                 } else {
6072                         if (flp->nfsfl_mirrorcnt == 1) {
6073                                 error = nfsrpc_writeds(vp, uiop, iomode,
6074                                     must_commit, stateidp, *dspp, off, xfer,
6075                                     fhp, 0, 1, dp->nfsdi_vers,
6076                                     dp->nfsdi_minorvers, tcred, p);
6077                                 if (error == 0) {
6078                                         NFSLOCKCLSTATE();
6079                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6080                                         NFSUNLOCKCLSTATE();
6081                                 }
6082                         } else {
6083                                 m = nfsm_copym(mp, rel_off, xfer);
6084                                 NFSCL_DEBUG(4, "mcopy reloff=%d xfer=%jd\n",
6085                                     rel_off, (uintmax_t)xfer);
6086                                 /*
6087                                  * Do last write to a mirrored DS with this
6088                                  * thread.
6089                                  */
6090                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6091                                         error = nfsio_writedsmir(vp, iomode,
6092                                             must_commit, stateidp, *dspp, off,
6093                                             xfer, fhp, m, dp->nfsdi_vers,
6094                                             dp->nfsdi_minorvers, drpc, tcred,
6095                                             p);
6096                                 else
6097                                         error = nfsrpc_writedsmir(vp, iomode,
6098                                             must_commit, stateidp, *dspp, off,
6099                                             xfer, fhp, m, dp->nfsdi_vers,
6100                                             dp->nfsdi_minorvers, tcred, p);
6101                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6102                                 if (nfsds_failerr(error)) {
6103                                         NFSCL_DEBUG(4,
6104                                             "DS layreterr for write\n");
6105                                         nfscl_dserr(NFSV4OP_WRITE, dp, lyp);
6106                                 }
6107                         }
6108                 }
6109                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6110                 if (error == 0) {
6111                         len -= xfer;
6112                         off += xfer;
6113                         rel_off += xfer;
6114                 }
6115                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6116                         NFSFREECRED(tcred);
6117         }
6118         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6119         return (error);
6120 }
6121
6122 /*
6123  * The actual read RPC done to a DS.
6124  */
6125 static int
6126 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6127     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6128     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6129 {
6130         uint32_t *tl;
6131         int attrflag, error, retlen;
6132         struct nfsrv_descript nfsd;
6133         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6134         struct nfsrv_descript *nd = &nfsd;
6135         struct nfssockreq *nrp;
6136         struct nfsvattr na;
6137
6138         nd->nd_mrep = NULL;
6139         if (vers == 0 || vers == NFS_VER4) {
6140                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6141                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6142                 vers = NFS_VER4;
6143                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6144                 if (flex != 0)
6145                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6146                 else
6147                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6148         } else {
6149                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6150                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6151                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6152         }
6153         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6154         txdr_hyper(io_off, tl);
6155         *(tl + 2) = txdr_unsigned(len);
6156         nrp = dsp->nfsclds_sockp;
6157         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6158         if (nrp == NULL)
6159                 /* If NULL, use the MDS socket. */
6160                 nrp = &nmp->nm_sockreq;
6161         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6162             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6163         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6164             error);
6165         if (error != 0)
6166                 return (error);
6167         if (vers == NFS_VER3) {
6168                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6169                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6170                 if (error != 0)
6171                         goto nfsmout;
6172         }
6173         if (nd->nd_repstat != 0) {
6174                 error = nd->nd_repstat;
6175                 goto nfsmout;
6176         }
6177         if (vers == NFS_VER3) {
6178                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6179                 *eofp = fxdr_unsigned(int, *(tl + 1));
6180         } else {
6181                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6182                 *eofp = fxdr_unsigned(int, *tl);
6183         }
6184         NFSM_STRSIZ(retlen, len);
6185         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6186         error = nfsm_mbufuio(nd, uiop, retlen);
6187 nfsmout:
6188         if (nd->nd_mrep != NULL)
6189                 mbuf_freem(nd->nd_mrep);
6190         return (error);
6191 }
6192
6193 /*
6194  * The actual write RPC done to a DS.
6195  */
6196 static int
6197 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6198     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6199     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6200     struct ucred *cred, NFSPROC_T *p)
6201 {
6202         uint32_t *tl;
6203         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6204         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6205         int32_t backup;
6206         struct nfsrv_descript nfsd;
6207         struct nfsrv_descript *nd = &nfsd;
6208         struct nfssockreq *nrp;
6209         struct nfsvattr na;
6210
6211         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6212         nd->nd_mrep = NULL;
6213         if (vers == 0 || vers == NFS_VER4) {
6214                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6215                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6216                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6217                 vers = NFS_VER4;
6218                 if (flex != 0)
6219                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6220                 else
6221                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6222                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6223         } else {
6224                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6225                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6226                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6227                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6228         }
6229         txdr_hyper(io_off, tl);
6230         tl += 2;
6231         if (vers == NFS_VER3)
6232                 *tl++ = txdr_unsigned(len);
6233         *tl++ = txdr_unsigned(*iomode);
6234         *tl = txdr_unsigned(len);
6235         nfsm_uiombuf(nd, uiop, len);
6236         nrp = dsp->nfsclds_sockp;
6237         if (nrp == NULL)
6238                 /* If NULL, use the MDS socket. */
6239                 nrp = &nmp->nm_sockreq;
6240         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6241             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6242         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6243             nd->nd_repstat);
6244         if (error != 0)
6245                 return (error);
6246         if (nd->nd_repstat != 0) {
6247                 /*
6248                  * In case the rpc gets retried, roll
6249                  * the uio fileds changed by nfsm_uiombuf()
6250                  * back.
6251                  */
6252                 uiop->uio_offset -= len;
6253                 uio_uio_resid_add(uiop, len);
6254                 uio_iov_base_add(uiop, -len);
6255                 uio_iov_len_add(uiop, len);
6256                 error = nd->nd_repstat;
6257         } else {
6258                 if (vers == NFS_VER3) {
6259                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6260                             NULL);
6261                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6262                         if (error != 0)
6263                                 goto nfsmout;
6264                 }
6265                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6266                 rlen = fxdr_unsigned(int, *tl++);
6267                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6268                 if (rlen == 0) {
6269                         error = NFSERR_IO;
6270                         goto nfsmout;
6271                 } else if (rlen < len) {
6272                         backup = len - rlen;
6273                         uio_iov_base_add(uiop, -(backup));
6274                         uio_iov_len_add(uiop, backup);
6275                         uiop->uio_offset -= backup;
6276                         uio_uio_resid_add(uiop, backup);
6277                         len = rlen;
6278                 }
6279                 commit = fxdr_unsigned(int, *tl++);
6280
6281                 /*
6282                  * Return the lowest commitment level
6283                  * obtained by any of the RPCs.
6284                  */
6285                 if (committed == NFSWRITE_FILESYNC)
6286                         committed = commit;
6287                 else if (committed == NFSWRITE_DATASYNC &&
6288                     commit == NFSWRITE_UNSTABLE)
6289                         committed = commit;
6290                 if (commit_thru_mds != 0) {
6291                         NFSLOCKMNT(nmp);
6292                         if (!NFSHASWRITEVERF(nmp)) {
6293                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6294                                 NFSSETWRITEVERF(nmp);
6295                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6296                                 *must_commit = 1;
6297                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6298                         }
6299                         NFSUNLOCKMNT(nmp);
6300                 } else {
6301                         NFSLOCKDS(dsp);
6302                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6303                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6304                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6305                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6306                                 *must_commit = 1;
6307                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6308                         }
6309                         NFSUNLOCKDS(dsp);
6310                 }
6311         }
6312 nfsmout:
6313         if (nd->nd_mrep != NULL)
6314                 mbuf_freem(nd->nd_mrep);
6315         *iomode = committed;
6316         if (nd->nd_repstat != 0 && error == 0)
6317                 error = nd->nd_repstat;
6318         return (error);
6319 }
6320
6321 /*
6322  * The actual write RPC done to a DS.
6323  * This variant is called from a separate kernel process for mirrors.
6324  * Any short write is considered an IO error.
6325  */
6326 static int
6327 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6328     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6329     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6330     struct ucred *cred, NFSPROC_T *p)
6331 {
6332         uint32_t *tl;
6333         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6334         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6335         struct nfsrv_descript nfsd;
6336         struct nfsrv_descript *nd = &nfsd;
6337         struct nfssockreq *nrp;
6338         struct nfsvattr na;
6339
6340         nd->nd_mrep = NULL;
6341         if (vers == 0 || vers == NFS_VER4) {
6342                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6343                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6344                 vers = NFS_VER4;
6345                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6346                     minorvers);
6347                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6348                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6349         } else {
6350                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6351                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6352                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6353                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6354         }
6355         txdr_hyper(io_off, tl);
6356         tl += 2;
6357         if (vers == NFS_VER3)
6358                 *tl++ = txdr_unsigned(len);
6359         *tl++ = txdr_unsigned(*iomode);
6360         *tl = txdr_unsigned(len);
6361         if (len > 0) {
6362                 /* Put data in mbuf chain. */
6363                 nd->nd_mb->m_next = m;
6364                 /* Set nd_mb and nd_bpos to end of data. */
6365                 while (m->m_next != NULL)
6366                         m = m->m_next;
6367                 nd->nd_mb = m;
6368                 nd->nd_bpos = mtod(m, char *) + m->m_len;
6369                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: lastmb len=%d\n", m->m_len);
6370         }
6371         nrp = dsp->nfsclds_sockp;
6372         if (nrp == NULL)
6373                 /* If NULL, use the MDS socket. */
6374                 nrp = &nmp->nm_sockreq;
6375         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6376             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6377         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6378             nd->nd_repstat);
6379         if (error != 0)
6380                 return (error);
6381         if (nd->nd_repstat != 0)
6382                 error = nd->nd_repstat;
6383         else {
6384                 if (vers == NFS_VER3) {
6385                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6386                             NULL);
6387                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6388                             error);
6389                         if (error != 0)
6390                                 goto nfsmout;
6391                 }
6392                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6393                 rlen = fxdr_unsigned(int, *tl++);
6394                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6395                     rlen);
6396                 if (rlen != len) {
6397                         error = NFSERR_IO;
6398                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6399                             len, rlen);
6400                         goto nfsmout;
6401                 }
6402                 commit = fxdr_unsigned(int, *tl++);
6403
6404                 /*
6405                  * Return the lowest commitment level
6406                  * obtained by any of the RPCs.
6407                  */
6408                 if (committed == NFSWRITE_FILESYNC)
6409                         committed = commit;
6410                 else if (committed == NFSWRITE_DATASYNC &&
6411                     commit == NFSWRITE_UNSTABLE)
6412                         committed = commit;
6413                 NFSLOCKDS(dsp);
6414                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6415                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6416                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6417                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6418                         *must_commit = 1;
6419                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6420                 }
6421                 NFSUNLOCKDS(dsp);
6422         }
6423 nfsmout:
6424         if (nd->nd_mrep != NULL)
6425                 mbuf_freem(nd->nd_mrep);
6426         *iomode = committed;
6427         if (nd->nd_repstat != 0 && error == 0)
6428                 error = nd->nd_repstat;
6429         return (error);
6430 }
6431
6432 /*
6433  * Start up the thread that will execute nfsrpc_writedsmir().
6434  */
6435 static void
6436 start_writedsmir(void *arg, int pending)
6437 {
6438         struct nfsclwritedsdorpc *drpc;
6439
6440         drpc = (struct nfsclwritedsdorpc *)arg;
6441         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6442             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6443             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6444             drpc->p);
6445         drpc->done = 1;
6446         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6447 }
6448
6449 /*
6450  * Set up the write DS mirror call for the pNFS I/O thread.
6451  */
6452 static int
6453 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6454     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6455     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6456     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6457 {
6458         int error, ret;
6459
6460         error = 0;
6461         drpc->done = 0;
6462         drpc->vp = vp;
6463         drpc->iomode = *iomode;
6464         drpc->must_commit = *must_commit;
6465         drpc->stateidp = stateidp;
6466         drpc->dsp = dsp;
6467         drpc->off = off;
6468         drpc->len = len;
6469         drpc->fhp = fhp;
6470         drpc->m = m;
6471         drpc->vers = vers;
6472         drpc->minorvers = minorvers;
6473         drpc->cred = cred;
6474         drpc->p = p;
6475         drpc->inprog = 0;
6476         ret = EIO;
6477         if (nfs_pnfsiothreads != 0) {
6478                 ret = nfs_pnfsio(start_writedsmir, drpc);
6479                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6480         }
6481         if (ret != 0)
6482                 error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6483                     dsp, off, len, fhp, m, vers, minorvers, cred, p);
6484         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6485         return (error);
6486 }
6487
6488 /*
6489  * Free up the nfsclds structure.
6490  */
6491 void
6492 nfscl_freenfsclds(struct nfsclds *dsp)
6493 {
6494         int i;
6495
6496         if (dsp == NULL)
6497                 return;
6498         if (dsp->nfsclds_sockp != NULL) {
6499                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6500                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6501                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6502                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6503         }
6504         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6505         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6506         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6507                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6508                         m_freem(
6509                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6510         }
6511         free(dsp, M_NFSCLDS);
6512 }
6513
6514 static enum nfsclds_state
6515 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6516     struct nfsclds **retdspp)
6517 {
6518         struct nfsclds *dsp, *cur_dsp;
6519
6520         /*
6521          * Search the list of nfsclds structures for one with the same
6522          * server.
6523          */
6524         cur_dsp = NULL;
6525         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6526                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6527                     dsp->nfsclds_servownlen != 0 &&
6528                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6529                     dsp->nfsclds_servownlen) &&
6530                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6531                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6532                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6533                             dsp->nfsclds_flags);
6534                         /* Server major id matches. */
6535                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6536                                 *retdspp = dsp;
6537                                 return (NFSDSP_USETHISSESSION);
6538                         }
6539
6540                         /*
6541                          * Note the first match, so it can be used for
6542                          * sequence'ing new sessions.
6543                          */
6544                         if (cur_dsp == NULL)
6545                                 cur_dsp = dsp;
6546                 }
6547         }
6548         if (cur_dsp != NULL) {
6549                 *retdspp = cur_dsp;
6550                 return (NFSDSP_SEQTHISSESSION);
6551         }
6552         return (NFSDSP_NOTFOUND);
6553 }
6554
6555 /*
6556  * NFS commit rpc to a NFSv4.1 DS.
6557  */
6558 static int
6559 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6560     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6561     NFSPROC_T *p)
6562 {
6563         uint32_t *tl;
6564         struct nfsrv_descript nfsd, *nd = &nfsd;
6565         struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
6566         struct nfssockreq *nrp;
6567         struct nfsvattr na;
6568         int attrflag, error;
6569         
6570         nd->nd_mrep = NULL;
6571         if (vers == 0 || vers == NFS_VER4) {
6572                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6573                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6574                 vers = NFS_VER4;
6575         } else
6576                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6577                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6578         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6579             minorvers);
6580         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6581         txdr_hyper(offset, tl);
6582         tl += 2;
6583         *tl = txdr_unsigned(cnt);
6584         nrp = dsp->nfsclds_sockp;
6585         if (nrp == NULL)
6586                 /* If NULL, use the MDS socket. */
6587                 nrp = &nmp->nm_sockreq;
6588         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6589             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6590         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6591             nd->nd_repstat);
6592         if (error != 0)
6593                 return (error);
6594         if (nd->nd_repstat == 0) {
6595                 if (vers == NFS_VER3) {
6596                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6597                             NULL);
6598                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6599                         if (error != 0)
6600                                 goto nfsmout;
6601                 }
6602                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6603                 NFSLOCKDS(dsp);
6604                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6605                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6606                         error = NFSERR_STALEWRITEVERF;
6607                 }
6608                 NFSUNLOCKDS(dsp);
6609         }
6610 nfsmout:
6611         if (error == 0 && nd->nd_repstat != 0)
6612                 error = nd->nd_repstat;
6613         mbuf_freem(nd->nd_mrep);
6614         return (error);
6615 }
6616
6617 /*
6618  * Start up the thread that will execute nfsrpc_commitds().
6619  */
6620 static void
6621 start_commitds(void *arg, int pending)
6622 {
6623         struct nfsclwritedsdorpc *drpc;
6624
6625         drpc = (struct nfsclwritedsdorpc *)arg;
6626         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
6627             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
6628             drpc->p);
6629         drpc->done = 1;
6630         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
6631 }
6632
6633 /*
6634  * Set up the commit DS mirror call for the pNFS I/O thread.
6635  */
6636 static int
6637 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6638     struct nfsfh *fhp, int vers, int minorvers,
6639     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6640 {
6641         int error, ret;
6642
6643         error = 0;
6644         drpc->done = 0;
6645         drpc->vp = vp;
6646         drpc->off = offset;
6647         drpc->len = cnt;
6648         drpc->dsp = dsp;
6649         drpc->fhp = fhp;
6650         drpc->vers = vers;
6651         drpc->minorvers = minorvers;
6652         drpc->cred = cred;
6653         drpc->p = p;
6654         drpc->inprog = 0;
6655         ret = EIO;
6656         if (nfs_pnfsiothreads != 0) {
6657                 ret = nfs_pnfsio(start_commitds, drpc);
6658                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
6659         }
6660         if (ret != 0)
6661                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
6662                     minorvers, cred, p);
6663         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
6664         return (error);
6665 }
6666
6667 /*
6668  * Set up the XDR arguments for the LayoutGet operation.
6669  */
6670 static void
6671 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
6672     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
6673     int layoutlen, int usecurstateid)
6674 {
6675         uint32_t *tl;
6676
6677         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
6678             NFSX_STATEID);
6679         *tl++ = newnfs_false;           /* Don't signal availability. */
6680         *tl++ = txdr_unsigned(layouttype);
6681         *tl++ = txdr_unsigned(iomode);
6682         txdr_hyper(offset, tl);
6683         tl += 2;
6684         txdr_hyper(len, tl);
6685         tl += 2;
6686         txdr_hyper(minlen, tl);
6687         tl += 2;
6688         if (usecurstateid != 0) {
6689                 /* Special stateid for Current stateid. */
6690                 *tl++ = txdr_unsigned(1);
6691                 *tl++ = 0;
6692                 *tl++ = 0;
6693                 *tl++ = 0;
6694         } else {
6695                 *tl++ = txdr_unsigned(stateidp->seqid);
6696                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
6697                 *tl++ = stateidp->other[0];
6698                 *tl++ = stateidp->other[1];
6699                 *tl++ = stateidp->other[2];
6700         }
6701         *tl = txdr_unsigned(layoutlen);
6702 }
6703
6704 /*
6705  * Parse the reply for a successful LayoutGet operation.
6706  */
6707 static int
6708 nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6709     int *retonclosep, struct nfsclflayouthead *flhp)
6710 {
6711         uint32_t *tl;
6712         struct nfsclflayout *flp, *prevflp, *tflp;
6713         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
6714         int m, mirrorcnt;
6715         uint64_t retlen, off;
6716         struct nfsfh *nfhp;
6717         uint8_t *cp;
6718         uid_t user;
6719         gid_t grp;
6720
6721         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
6722         error = 0;
6723         flp = NULL;
6724         gotiomode = -1;
6725         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
6726         if (*tl++ != 0)
6727                 *retonclosep = 1;
6728         else
6729                 *retonclosep = 0;
6730         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
6731         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
6732             (int)stateidp->seqid);
6733         stateidp->other[0] = *tl++;
6734         stateidp->other[1] = *tl++;
6735         stateidp->other[2] = *tl++;
6736         cnt = fxdr_unsigned(int, *tl);
6737         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
6738         if (cnt <= 0 || cnt > 10000) {
6739                 /* Don't accept more than 10000 layouts in reply. */
6740                 error = NFSERR_BADXDR;
6741                 goto nfsmout;
6742         }
6743         for (i = 0; i < cnt; i++) {
6744                 /* Dissect to the layout type. */
6745                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
6746                     3 * NFSX_UNSIGNED);
6747                 off = fxdr_hyper(tl); tl += 2;
6748                 retlen = fxdr_hyper(tl); tl += 2;
6749                 iomode = fxdr_unsigned(int, *tl++);
6750                 laytype = fxdr_unsigned(int, *tl);
6751                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
6752                     (uintmax_t)off, (uintmax_t)retlen, iomode);
6753                 /* Ignore length of layout body for now. */
6754                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
6755                         /* Parse the File layout up to fhcnt. */
6756                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
6757                             NFSX_HYPER + NFSX_V4DEVICEID);
6758                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
6759                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
6760                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6761                         if (fhcnt < 0 || fhcnt > 100) {
6762                                 /* Don't accept more than 100 file handles. */
6763                                 error = NFSERR_BADXDR;
6764                                 goto nfsmout;
6765                         }
6766                         if (fhcnt > 0)
6767                                 flp = malloc(sizeof(*flp) + fhcnt *
6768                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
6769                                     M_WAITOK);
6770                         else
6771                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
6772                                     M_WAITOK);
6773                         flp->nfsfl_flags = NFSFL_FILE;
6774                         flp->nfsfl_fhcnt = 0;
6775                         flp->nfsfl_devp = NULL;
6776                         flp->nfsfl_off = off;
6777                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6778                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6779                         else
6780                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
6781                         flp->nfsfl_iomode = iomode;
6782                         if (gotiomode == -1)
6783                                 gotiomode = flp->nfsfl_iomode;
6784                         /* Ignore layout body length for now. */
6785                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
6786                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6787                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
6788                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
6789                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
6790                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
6791                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
6792                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
6793                         for (j = 0; j < fhcnt; j++) {
6794                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6795                                 nfhlen = fxdr_unsigned(int, *tl);
6796                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
6797                                         error = NFSERR_BADXDR;
6798                                         goto nfsmout;
6799                                 }
6800                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
6801                                     M_NFSFH, M_WAITOK);
6802                                 flp->nfsfl_fh[j] = nfhp;
6803                                 flp->nfsfl_fhcnt++;
6804                                 nfhp->nfh_len = nfhlen;
6805                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
6806                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
6807                         }
6808                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
6809                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
6810                             NFSX_HYPER);
6811                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
6812                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
6813                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
6814                                 error = NFSERR_BADXDR;
6815                                 goto nfsmout;
6816                         }
6817                         flp = malloc(sizeof(*flp) + mirrorcnt *
6818                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
6819                         flp->nfsfl_flags = NFSFL_FLEXFILE;
6820                         flp->nfsfl_mirrorcnt = mirrorcnt;
6821                         flp->nfsfl_devp = NULL;
6822                         flp->nfsfl_off = off;
6823                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6824                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6825                         else
6826                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
6827                         flp->nfsfl_iomode = iomode;
6828                         if (gotiomode == -1)
6829                                 gotiomode = flp->nfsfl_iomode;
6830                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
6831                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
6832                             (uintmax_t)flp->nfsfl_stripeunit);
6833                         for (j = 0; j < mirrorcnt; j++) {
6834                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6835                                 k = fxdr_unsigned(int, *tl);
6836                                 if (k < 1 || k > 128) {
6837                                         error = NFSERR_BADXDR;
6838                                         goto nfsmout;
6839                                 }
6840                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
6841                                 for (l = 0; l < k; l++) {
6842                                         NFSM_DISSECT(tl, uint32_t *,
6843                                             NFSX_V4DEVICEID + NFSX_STATEID +
6844                                             2 * NFSX_UNSIGNED);
6845                                         if (l == 0) {
6846                                                 /* Just use the first server. */
6847                                                 NFSBCOPY(tl,
6848                                                     flp->nfsfl_ffm[j].dev,
6849                                                     NFSX_V4DEVICEID);
6850                                                 tl += (NFSX_V4DEVICEID /
6851                                                     NFSX_UNSIGNED);
6852                                                 tl++;
6853                                                 flp->nfsfl_ffm[j].st.seqid =
6854                                                     *tl++;
6855                                                 flp->nfsfl_ffm[j].st.other[0] =
6856                                                     *tl++;
6857                                                 flp->nfsfl_ffm[j].st.other[1] =
6858                                                     *tl++;
6859                                                 flp->nfsfl_ffm[j].st.other[2] =
6860                                                     *tl++;
6861                                                 NFSCL_DEBUG(4, "st.seqid=%u "
6862                                                  "st.o0=0x%x st.o1=0x%x "
6863                                                  "st.o2=0x%x\n",
6864                                                  flp->nfsfl_ffm[j].st.seqid,
6865                                                  flp->nfsfl_ffm[j].st.other[0],
6866                                                  flp->nfsfl_ffm[j].st.other[1],
6867                                                  flp->nfsfl_ffm[j].st.other[2]);
6868                                         } else
6869                                                 tl += ((NFSX_V4DEVICEID +
6870                                                     NFSX_STATEID +
6871                                                     NFSX_UNSIGNED) /
6872                                                     NFSX_UNSIGNED);
6873                                         fhcnt = fxdr_unsigned(int, *tl);
6874                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6875                                         if (fhcnt < 1 ||
6876                                             fhcnt > NFSDEV_MAXVERS) {
6877                                                 error = NFSERR_BADXDR;
6878                                                 goto nfsmout;
6879                                         }
6880                                         for (m = 0; m < fhcnt; m++) {
6881                                                 NFSM_DISSECT(tl, uint32_t *,
6882                                                     NFSX_UNSIGNED);
6883                                                 nfhlen = fxdr_unsigned(int,
6884                                                     *tl);
6885                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
6886                                                     nfhlen);
6887                                                 if (nfhlen <= 0 || nfhlen >
6888                                                     NFSX_V4FHMAX) {
6889                                                         error = NFSERR_BADXDR;
6890                                                         goto nfsmout;
6891                                                 }
6892                                                 NFSM_DISSECT(cp, uint8_t *,
6893                                                     NFSM_RNDUP(nfhlen));
6894                                                 if (l == 0) {
6895                                                         flp->nfsfl_ffm[j].fhcnt 
6896                                                             = fhcnt;
6897                                                         nfhp = malloc(
6898                                                             sizeof(*nfhp) +
6899                                                             nfhlen - 1, M_NFSFH,
6900                                                             M_WAITOK);
6901                                                         flp->nfsfl_ffm[j].fh[m]
6902                                                             = nfhp;
6903                                                         nfhp->nfh_len = nfhlen;
6904                                                         NFSBCOPY(cp,
6905                                                             nfhp->nfh_fh,
6906                                                             nfhlen);
6907                                                         NFSCL_DEBUG(4,
6908                                                             "got fh\n");
6909                                                 }
6910                                         }
6911                                         /* Now, get the ffsd_user/ffds_group. */
6912                                         error = nfsrv_parseug(nd, 0, &user,
6913                                             &grp, curthread);
6914                                         NFSCL_DEBUG(4, "after parseu=%d\n",
6915                                             error);
6916                                         if (error == 0)
6917                                                 error = nfsrv_parseug(nd, 1,
6918                                                     &user, &grp, curthread);
6919                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
6920                                             grp);
6921                                         if (error != 0)
6922                                                 goto nfsmout;
6923                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
6924                                             user, grp);
6925                                         if (l == 0) {
6926                                                 flp->nfsfl_ffm[j].user = user;
6927                                                 flp->nfsfl_ffm[j].group = grp;
6928                                                 NFSCL_DEBUG(4,
6929                                                     "usr=%d grp=%d\n", user,
6930                                                     grp);
6931                                         }
6932                                 }
6933                         }
6934                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6935                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
6936                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
6937                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
6938                             flp->nfsfl_fflags, flp->nfsfl_statshint);
6939                 } else {
6940                         error = NFSERR_BADXDR;
6941                         goto nfsmout;
6942                 }
6943                 if (flp->nfsfl_iomode == gotiomode) {
6944                         /* Keep the list in increasing offset order. */
6945                         tflp = LIST_FIRST(flhp);
6946                         prevflp = NULL;
6947                         while (tflp != NULL &&
6948                             tflp->nfsfl_off < flp->nfsfl_off) {
6949                                 prevflp = tflp;
6950                                 tflp = LIST_NEXT(tflp, nfsfl_list);
6951                         }
6952                         if (prevflp == NULL)
6953                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
6954                         else
6955                                 LIST_INSERT_AFTER(prevflp, flp,
6956                                     nfsfl_list);
6957                         NFSCL_DEBUG(4, "flp inserted\n");
6958                 } else {
6959                         printf("nfscl_layoutget(): got wrong iomode\n");
6960                         nfscl_freeflayout(flp);
6961                 }
6962                 flp = NULL;
6963         }
6964 nfsmout:
6965         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
6966         if (error != 0 && flp != NULL)
6967                 nfscl_freeflayout(flp);
6968         return (error);
6969 }
6970
6971 /*
6972  * Parse a user/group digit string.
6973  */
6974 static int
6975 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
6976     NFSPROC_T *p)
6977 {
6978         uint32_t *tl;
6979         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
6980         uint32_t len = 0;
6981         int error = 0;
6982
6983         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6984         len = fxdr_unsigned(uint32_t, *tl);
6985         str = NULL;
6986         if (len > NFSV4_OPAQUELIMIT) {
6987                 error = NFSERR_BADXDR;
6988                 goto nfsmout;
6989         }
6990         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
6991         if (len == 0) {
6992                 if (dogrp != 0)
6993                         *gidp = GID_NOGROUP;
6994                 else
6995                         *uidp = UID_NOBODY;
6996                 return (0);
6997         }
6998         if (len > NFSV4_SMALLSTR)
6999                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7000         else
7001                 str = str0;
7002         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7003         NFSBCOPY(cp, str, len);
7004         str[len] = '\0';
7005         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7006         if (dogrp != 0)
7007                 error = nfsv4_strtogid(nd, str, len, gidp, p);
7008         else
7009                 error = nfsv4_strtouid(nd, str, len, uidp, p);
7010 nfsmout:
7011         if (len > NFSV4_SMALLSTR)
7012                 free(str, M_TEMP);
7013         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7014         return (error);
7015 }
7016
7017 /*
7018  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7019  * so that it does both an Open and a Layoutget.
7020  */
7021 static int
7022 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7023     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7024     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7025     struct ucred *cred, NFSPROC_T *p)
7026 {
7027         struct nfscllayout *lyp;
7028         struct nfsclflayout *flp;
7029         struct nfsclflayouthead flh;
7030         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7031         int layouttype, laystat;
7032         nfsv4stateid_t stateid;
7033         struct nfsclsession *tsep;
7034
7035         error = 0;
7036         if (NFSHASFLEXFILE(nmp))
7037                 layouttype = NFSLAYOUT_FLEXFILE;
7038         else
7039                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7040         /*
7041          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7042          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7043          * flp == NULL.
7044          */
7045         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7046             &recalled);
7047         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7048         if (lyp == NULL)
7049                 islocked = 0;
7050         else if (flp != NULL)
7051                 islocked = 1;
7052         else
7053                 islocked = 2;
7054         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7055                 LIST_INIT(&flh);
7056                 tsep = nfsmnt_mdssession(nmp);
7057                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7058                     3 * NFSX_UNSIGNED);
7059                 if (lyp == NULL)
7060                         usecurstateid = 1;
7061                 else {
7062                         usecurstateid = 0;
7063                         stateid.seqid = lyp->nfsly_stateid.seqid;
7064                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7065                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7066                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7067                 }
7068                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7069                     newfhp, newfhlen, mode, op, name, namelen,
7070                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7071                     &retonclose, &flh, &laystat, cred, p);
7072                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7073                     laystat, error);
7074                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7075                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7076                     &islocked, cred, p);
7077         } else
7078                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7079                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7080         if (islocked == 2)
7081                 nfscl_rellayout(lyp, 1);
7082         else if (islocked == 1)
7083                 nfscl_rellayout(lyp, 0);
7084         return (error);
7085 }
7086
7087 /*
7088  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7089  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7090  * handled by nfsrpc_openrpc().
7091  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7092  * can be NULL.
7093  */
7094 static int
7095 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7096     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7097     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7098     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7099     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7100     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7101 {
7102         uint32_t *tl;
7103         struct nfsrv_descript nfsd, *nd = &nfsd;
7104         struct nfscldeleg *ndp = NULL;
7105         struct nfsvattr nfsva;
7106         struct nfsclsession *tsep;
7107         uint32_t rflags, deleg;
7108         nfsattrbit_t attrbits;
7109         int error, ret, acesize, limitby, iomode;
7110
7111         *dpp = NULL;
7112         *laystatp = ENXIO;
7113         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7114             0, 0);
7115         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7116         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7117         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7118         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7119         tsep = nfsmnt_mdssession(nmp);
7120         *tl++ = tsep->nfsess_clientid.lval[0];
7121         *tl = tsep->nfsess_clientid.lval[1];
7122         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7123         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7124         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7125         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7126         nfsm_strtom(nd, name, namelen);
7127         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7128         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7129         NFSZERO_ATTRBIT(&attrbits);
7130         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7131         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7132         nfsrv_putattrbit(nd, &attrbits);
7133         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7134         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7135         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7136                 iomode = NFSLAYOUTIOMODE_RW;
7137         else
7138                 iomode = NFSLAYOUTIOMODE_READ;
7139         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7140             layouttype, layoutlen, usecurstateid);
7141         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7142             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7143         if (error != 0)
7144                 return (error);
7145         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7146         if (nd->nd_repstat != 0)
7147                 *laystatp = nd->nd_repstat;
7148         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7149                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7150                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7151                     6 * NFSX_UNSIGNED);
7152                 op->nfso_stateid.seqid = *tl++;
7153                 op->nfso_stateid.other[0] = *tl++;
7154                 op->nfso_stateid.other[1] = *tl++;
7155                 op->nfso_stateid.other[2] = *tl;
7156                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7157                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7158                 if (error != 0)
7159                         goto nfsmout;
7160                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7161                 deleg = fxdr_unsigned(u_int32_t, *tl);
7162                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7163                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7164                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7165                               NFSCLFLAGS_FIRSTDELEG))
7166                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7167                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7168                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7169                             M_NFSCLDELEG, M_WAITOK);
7170                         LIST_INIT(&ndp->nfsdl_owner);
7171                         LIST_INIT(&ndp->nfsdl_lock);
7172                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7173                         ndp->nfsdl_fhlen = newfhlen;
7174                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7175                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7176                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7177                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7178                             NFSX_UNSIGNED);
7179                         ndp->nfsdl_stateid.seqid = *tl++;
7180                         ndp->nfsdl_stateid.other[0] = *tl++;
7181                         ndp->nfsdl_stateid.other[1] = *tl++;
7182                         ndp->nfsdl_stateid.other[2] = *tl++;
7183                         ret = fxdr_unsigned(int, *tl);
7184                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7185                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7186                                 /*
7187                                  * Indicates how much the file can grow.
7188                                  */
7189                                 NFSM_DISSECT(tl, u_int32_t *,
7190                                     3 * NFSX_UNSIGNED);
7191                                 limitby = fxdr_unsigned(int, *tl++);
7192                                 switch (limitby) {
7193                                 case NFSV4OPEN_LIMITSIZE:
7194                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7195                                         break;
7196                                 case NFSV4OPEN_LIMITBLOCKS:
7197                                         ndp->nfsdl_sizelimit =
7198                                             fxdr_unsigned(u_int64_t, *tl++);
7199                                         ndp->nfsdl_sizelimit *=
7200                                             fxdr_unsigned(u_int64_t, *tl);
7201                                         break;
7202                                 default:
7203                                         error = NFSERR_BADXDR;
7204                                         goto nfsmout;
7205                                 };
7206                         } else
7207                                 ndp->nfsdl_flags = NFSCLDL_READ;
7208                         if (ret != 0)
7209                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7210                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7211                             &acesize, p);
7212                         if (error != 0)
7213                                 goto nfsmout;
7214                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7215                         error = NFSERR_BADXDR;
7216                         goto nfsmout;
7217                 }
7218                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7219                     nfscl_assumeposixlocks)
7220                         op->nfso_posixlock = 1;
7221                 else
7222                         op->nfso_posixlock = 0;
7223                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7224                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7225                 if (*++tl == 0) {
7226                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7227                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7228                             NULL, NULL, NULL, p, cred);
7229                         if (error != 0)
7230                                 goto nfsmout;
7231                         if (ndp != NULL) {
7232                                 ndp->nfsdl_change = nfsva.na_filerev;
7233                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7234                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7235                                 *dpp = ndp;
7236                                 ndp = NULL;
7237                         }
7238                         /*
7239                          * At this point, the Open has succeeded, so set
7240                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7241                          * this function just won't return a layout.
7242                          */
7243                         if (nd->nd_repstat == 0) {
7244                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7245                                 *laystatp = fxdr_unsigned(int, *++tl);
7246                                 if (*laystatp == 0) {
7247                                         error = nfsrv_parselayoutget(nd,
7248                                             stateidp, retonclosep, flhp);
7249                                         if (error != 0)
7250                                                 *laystatp = error;
7251                                 }
7252                         } else
7253                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7254                 }
7255         }
7256         if (nd->nd_repstat != 0 && error == 0)
7257                 error = nd->nd_repstat;
7258 nfsmout:
7259         free(ndp, M_NFSCLDELEG);
7260         mbuf_freem(nd->nd_mrep);
7261         return (error);
7262 }
7263
7264 /*
7265  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7266  * Used only for mounts with pNFS enabled.
7267  */
7268 static int
7269 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7270     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7271     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7272     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7273     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7274     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7275     struct nfsclflayouthead *flhp, int *laystatp)
7276 {
7277         uint32_t *tl;
7278         int error = 0, deleg, newone, ret, acesize, limitby;
7279         struct nfsrv_descript nfsd, *nd = &nfsd;
7280         struct nfsclopen *op;
7281         struct nfscldeleg *dp = NULL;
7282         struct nfsnode *np;
7283         struct nfsfh *nfhp;
7284         struct nfsclsession *tsep;
7285         nfsattrbit_t attrbits;
7286         nfsv4stateid_t stateid;
7287         struct nfsmount *nmp;
7288
7289         nmp = VFSTONFS(dvp->v_mount);
7290         np = VTONFS(dvp);
7291         *laystatp = ENXIO;
7292         *unlockedp = 0;
7293         *nfhpp = NULL;
7294         *dpp = NULL;
7295         *attrflagp = 0;
7296         *dattrflagp = 0;
7297         if (namelen > NFS_MAXNAMLEN)
7298                 return (ENAMETOOLONG);
7299         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7300         /*
7301          * For V4, this is actually an Open op.
7302          */
7303         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7304         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7305         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7306             NFSV4OPEN_ACCESSREAD);
7307         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7308         tsep = nfsmnt_mdssession(nmp);
7309         *tl++ = tsep->nfsess_clientid.lval[0];
7310         *tl = tsep->nfsess_clientid.lval[1];
7311         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7312         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7313         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7314         if ((fmode & O_EXCL) != 0) {
7315                 if (NFSHASSESSPERSIST(nmp)) {
7316                         /* Use GUARDED for persistent sessions. */
7317                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
7318                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7319                 } else {
7320                         /* Otherwise, use EXCLUSIVE4_1. */
7321                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7322                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7323                         *tl++ = cverf.lval[0];
7324                         *tl = cverf.lval[1];
7325                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7326                 }
7327         } else {
7328                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7329                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
7330         }
7331         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7332         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7333         nfsm_strtom(nd, name, namelen);
7334         /* Get the new file's handle and attributes, plus save the FH. */
7335         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7336         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7337         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
7338         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7339         NFSGETATTR_ATTRBIT(&attrbits);
7340         nfsrv_putattrbit(nd, &attrbits);
7341         /* Get the directory's post-op attributes. */
7342         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7343         *tl = txdr_unsigned(NFSV4OP_PUTFH);
7344         nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7345         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7346         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7347         nfsrv_putattrbit(nd, &attrbits);
7348         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7349         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7350         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7351         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7352             layouttype, layoutlen, usecurstateid);
7353         error = nfscl_request(nd, dvp, p, cred, dstuff);
7354         if (error != 0)
7355                 return (error);
7356         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7357             error);
7358         if (nd->nd_repstat != 0)
7359                 *laystatp = nd->nd_repstat;
7360         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7361         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7362                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7363                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7364                     6 * NFSX_UNSIGNED);
7365                 stateid.seqid = *tl++;
7366                 stateid.other[0] = *tl++;
7367                 stateid.other[1] = *tl++;
7368                 stateid.other[2] = *tl;
7369                 nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7370                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7371                 deleg = fxdr_unsigned(int, *tl);
7372                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7373                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7374                         if (!(owp->nfsow_clp->nfsc_flags &
7375                               NFSCLFLAGS_FIRSTDELEG))
7376                                 owp->nfsow_clp->nfsc_flags |=
7377                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7378                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7379                             M_NFSCLDELEG, M_WAITOK);
7380                         LIST_INIT(&dp->nfsdl_owner);
7381                         LIST_INIT(&dp->nfsdl_lock);
7382                         dp->nfsdl_clp = owp->nfsow_clp;
7383                         newnfs_copyincred(cred, &dp->nfsdl_cred);
7384                         nfscl_lockinit(&dp->nfsdl_rwlock);
7385                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7386                             NFSX_UNSIGNED);
7387                         dp->nfsdl_stateid.seqid = *tl++;
7388                         dp->nfsdl_stateid.other[0] = *tl++;
7389                         dp->nfsdl_stateid.other[1] = *tl++;
7390                         dp->nfsdl_stateid.other[2] = *tl++;
7391                         ret = fxdr_unsigned(int, *tl);
7392                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7393                                 dp->nfsdl_flags = NFSCLDL_WRITE;
7394                                 /*
7395                                  * Indicates how much the file can grow.
7396                                  */
7397                                 NFSM_DISSECT(tl, u_int32_t *,
7398                                     3 * NFSX_UNSIGNED);
7399                                 limitby = fxdr_unsigned(int, *tl++);
7400                                 switch (limitby) {
7401                                 case NFSV4OPEN_LIMITSIZE:
7402                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
7403                                         break;
7404                                 case NFSV4OPEN_LIMITBLOCKS:
7405                                         dp->nfsdl_sizelimit =
7406                                             fxdr_unsigned(u_int64_t, *tl++);
7407                                         dp->nfsdl_sizelimit *=
7408                                             fxdr_unsigned(u_int64_t, *tl);
7409                                         break;
7410                                 default:
7411                                         error = NFSERR_BADXDR;
7412                                         goto nfsmout;
7413                                 };
7414                         } else {
7415                                 dp->nfsdl_flags = NFSCLDL_READ;
7416                         }
7417                         if (ret != 0)
7418                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
7419                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7420                             &acesize, p);
7421                         if (error != 0)
7422                                 goto nfsmout;
7423                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7424                         error = NFSERR_BADXDR;
7425                         goto nfsmout;
7426                 }
7427
7428                 /* Now, we should have the status for the SaveFH. */
7429                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7430                 if (*++tl == 0) {
7431                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7432                         /*
7433                          * Now, process the GetFH and Getattr for the newly
7434                          * created file. nfscl_mtofh() will set
7435                          * ND_NOMOREDATA if these weren't successful.
7436                          */
7437                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7438                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7439                         if (error != 0)
7440                                 goto nfsmout;
7441                 } else
7442                         nd->nd_flag |= ND_NOMOREDATA;
7443                 /* Now we have the PutFH and Getattr for the directory. */
7444                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7445                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7446                         if (*++tl != 0)
7447                                 nd->nd_flag |= ND_NOMOREDATA;
7448                         else {
7449                                 NFSM_DISSECT(tl, uint32_t *, 2 *
7450                                     NFSX_UNSIGNED);
7451                                 if (*++tl != 0)
7452                                         nd->nd_flag |= ND_NOMOREDATA;
7453                         }
7454                 }
7455                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7456                         /* Load the directory attributes. */
7457                         error = nfsm_loadattr(nd, dnap);
7458                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7459                         if (error != 0)
7460                                 goto nfsmout;
7461                         *dattrflagp = 1;
7462                         if (dp != NULL && *attrflagp != 0) {
7463                                 dp->nfsdl_change = nnap->na_filerev;
7464                                 dp->nfsdl_modtime = nnap->na_mtime;
7465                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7466                         }
7467                         /*
7468                          * We can now complete the Open state.
7469                          */
7470                         nfhp = *nfhpp;
7471                         if (dp != NULL) {
7472                                 dp->nfsdl_fhlen = nfhp->nfh_len;
7473                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7474                                     nfhp->nfh_len);
7475                         }
7476                         /*
7477                          * Get an Open structure that will be
7478                          * attached to the OpenOwner, acquired already.
7479                          */
7480                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
7481                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7482                             cred, p, NULL, &op, &newone, NULL, 0);
7483                         if (error != 0)
7484                                 goto nfsmout;
7485                         op->nfso_stateid = stateid;
7486                         newnfs_copyincred(cred, &op->nfso_cred);
7487         
7488                         nfscl_openrelease(nmp, op, error, newone);
7489                         *unlockedp = 1;
7490
7491                         /* Now, handle the RestoreFH and LayoutGet. */
7492                         if (nd->nd_repstat == 0) {
7493                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7494                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
7495                                 if (*laystatp == 0) {
7496                                         error = nfsrv_parselayoutget(nd,
7497                                             stateidp, retonclosep, flhp);
7498                                         if (error != 0)
7499                                                 *laystatp = error;
7500                                 }
7501                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7502                                     error);
7503                         } else
7504                                 nd->nd_repstat = 0;
7505                 }
7506         }
7507         if (nd->nd_repstat != 0 && error == 0)
7508                 error = nd->nd_repstat;
7509         if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7510                 nfscl_initiate_recovery(owp->nfsow_clp);
7511 nfsmout:
7512         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7513         if (error == 0)
7514                 *dpp = dp;
7515         else
7516                 free(dp, M_NFSCLDELEG);
7517         mbuf_freem(nd->nd_mrep);
7518         return (error);
7519 }
7520
7521 /*
7522  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7523  */
7524 static int
7525 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7526     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7527     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7528     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7529     int *dattrflagp, void *dstuff, int *unlockedp)
7530 {
7531         struct nfscllayout *lyp;
7532         struct nfsclflayouthead flh;
7533         struct nfsfh *nfhp;
7534         struct nfsclsession *tsep;
7535         struct nfsmount *nmp;
7536         nfsv4stateid_t stateid;
7537         int error, layoutlen, layouttype, retonclose, laystat;
7538
7539         error = 0;
7540         nmp = VFSTONFS(dvp->v_mount);
7541         if (NFSHASFLEXFILE(nmp))
7542                 layouttype = NFSLAYOUT_FLEXFILE;
7543         else
7544                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7545         LIST_INIT(&flh);
7546         tsep = nfsmnt_mdssession(nmp);
7547         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7548         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7549             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
7550             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
7551             &flh, &laystat);
7552         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
7553             laystat, error);
7554         lyp = NULL;
7555         if (laystat == 0) {
7556                 nfhp = *nfhpp;
7557                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
7558                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
7559                     layouttype, laystat, NULL, cred, p);
7560         } else
7561                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
7562                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
7563                     cred, p);
7564         if (laystat == 0)
7565                 nfscl_rellayout(lyp, 0);
7566         return (error);
7567 }
7568
7569 /*
7570  * Process the results of a layoutget() operation.
7571  */
7572 static int
7573 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
7574     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
7575     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
7576     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
7577 {
7578         struct nfsclflayout *tflp;
7579         struct nfscldevinfo *dip;
7580         uint8_t *dev;
7581
7582         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
7583                 NFSLOCKMNT(nmp);
7584                 if (!NFSHASFLEXFILE(nmp)) {
7585                         /* Switch to using Flex File Layout. */
7586                         nmp->nm_state |= NFSSTA_FLEXFILE;
7587                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7588                         /* Disable pNFS. */
7589                         NFSCL_DEBUG(1, "disable PNFS\n");
7590                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
7591                 }
7592                 NFSUNLOCKMNT(nmp);
7593         }
7594         if (laystat == 0) {
7595                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
7596                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
7597                         laystat = nfscl_adddevinfo(nmp, NULL, tflp);
7598                         NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
7599                         if (laystat != 0) {
7600                                 if (layouttype == NFSLAYOUT_FLEXFILE)
7601                                         dev = tflp->nfsfl_ffm[0].dev;
7602                                 else
7603                                         dev = tflp->nfsfl_dev;
7604                                 laystat = nfsrpc_getdeviceinfo(nmp, dev,
7605                                     layouttype, notifybit, &dip, cred, p);
7606                                 NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
7607                                     laystat);
7608                                 if (laystat != 0)
7609                                         break;
7610                                 laystat = nfscl_adddevinfo(nmp, dip, tflp);
7611                                 if (laystat != 0)
7612                                         printf("getlayout: cannot add\n");
7613                         }
7614                 }
7615         }
7616         if (laystat == 0) {
7617                 /*
7618                  * nfscl_layout() always returns with the nfsly_lock
7619                  * set to a refcnt (shared lock).
7620                  * Passing in dvp is sufficient, since it is only used to
7621                  * get the fsid for the file system.
7622                  */
7623                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
7624                     layouttype, retonclose, flhp, lypp, cred, p);
7625                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
7626                     laystat);
7627                 if (laystat == 0 && islockedp != NULL)
7628                         *islockedp = 1;
7629         }
7630         return (laystat);
7631 }
7632