]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
Upgrade to version 3.1.5
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46
47 #include "opt_inet6.h"
48
49 #include <fs/nfs/nfsport.h>
50 #include <fs/nfsclient/nfs.h>
51 #include <sys/extattr.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54
55 SYSCTL_DECL(_vfs_nfs);
56
57 static int      nfsignore_eexist = 0;
58 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
59     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
60
61 static int      nfscl_dssameconn = 0;
62 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
63     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
64
65 /*
66  * Global variables
67  */
68 extern int nfs_numnfscbd;
69 extern struct timeval nfsboottime;
70 extern u_int32_t newnfs_false, newnfs_true;
71 extern nfstype nfsv34_type[9];
72 extern int nfsrv_useacl;
73 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
74 extern int nfscl_debuglevel;
75 extern int nfs_pnfsiothreads;
76 extern u_long sb_max_adj;
77 extern int nfs_maxcopyrange;
78 NFSCLSTATEMUTEX;
79 int nfstest_outofseq = 0;
80 int nfscl_assumeposixlocks = 1;
81 int nfscl_enablecallb = 0;
82 short nfsv4_cbport = NFSV4_CBPORT;
83 int nfstest_openallsetattr = 0;
84
85 #define DIRHDSIZ        offsetof(struct dirent, d_name)
86
87 /*
88  * nfscl_getsameserver() can return one of three values:
89  * NFSDSP_USETHISSESSION - Use this session for the DS.
90  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
91  *     session.
92  * NFSDSP_NOTFOUND - No matching server was found.
93  */
94 enum nfsclds_state {
95         NFSDSP_USETHISSESSION = 0,
96         NFSDSP_SEQTHISSESSION = 1,
97         NFSDSP_NOTFOUND = 2,
98 };
99
100 /*
101  * Do a write RPC on a DS data file, using this structure for the arguments,
102  * so that this function can be executed by a separate kernel process.
103  */
104 struct nfsclwritedsdorpc {
105         int                     done;
106         int                     inprog;
107         struct task             tsk;
108         struct vnode            *vp;
109         int                     iomode;
110         int                     must_commit;
111         nfsv4stateid_t          *stateidp;
112         struct nfsclds          *dsp;
113         uint64_t                off;
114         int                     len;
115 #ifdef notyet
116         int                     advise;
117 #endif
118         struct nfsfh            *fhp;
119         struct mbuf             *m;
120         int                     vers;
121         int                     minorvers;
122         struct ucred            *cred;
123         NFSPROC_T               *p;
124         int                     err;
125 };
126
127 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
128     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
129 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
130     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
131 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
132     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
133     void *);
134 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
135     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
136     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
137 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
138     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
139     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
140     int *, void *, int *);
141 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
142     struct nfscllockowner *, u_int64_t, u_int64_t,
143     u_int32_t, struct ucred *, NFSPROC_T *, int);
144 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
145     struct acl *, nfsv4stateid_t *, void *);
146 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
147     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
148     struct ucred *, NFSPROC_T *);
149 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
150     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
151     NFSPROC_T *);
152 static void nfscl_initsessionslots(struct nfsclsession *);
153 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
154     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
155     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
156     NFSPROC_T *);
157 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
158     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
159     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
160     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
161 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
162     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
163     struct ucred *, NFSPROC_T *);
164 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
165     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
166     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
167 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
168     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
169     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
170 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
171     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
172     struct ucred *, NFSPROC_T *);
173 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
174     struct nfsclds *, struct nfsclds **, uint32_t *);
175 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
176     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
177     NFSPROC_T *);
178 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
179     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
180 #ifdef notyet
181 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
182     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
183     NFSPROC_T *);
184 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
185     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
186 #endif
187 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
188     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
189 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
190     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
191 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
192     NFSPROC_T *);
193 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
194     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
195 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
196     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
197     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
198 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
199     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
200     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
201     struct nfsfh **, int *, int *, void *, int *);
202 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
205     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
206 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
207     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
208     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
209     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
210     int, int, int, int *, struct nfsclflayouthead *, int *);
211 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
212     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
213     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
214 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
215     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
216     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
217 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
218     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
219     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
220 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
221     int, struct nfsvattr *, int *, struct ucred *);
222 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
223
224 int nfs_pnfsio(task_fn_t *, void *);
225
226 /*
227  * nfs null call from vfs.
228  */
229 int
230 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
231 {
232         int error;
233         struct nfsrv_descript nfsd, *nd = &nfsd;
234         
235         NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
236         error = nfscl_request(nd, vp, p, cred, NULL);
237         if (nd->nd_repstat && !error)
238                 error = nd->nd_repstat;
239         m_freem(nd->nd_mrep);
240         return (error);
241 }
242
243 /*
244  * nfs access rpc op.
245  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
246  * modes are changed on the server, accesses might still fail later.
247  */
248 int
249 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
250     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
251 {
252         int error;
253         u_int32_t mode, rmode;
254
255         if (acmode & VREAD)
256                 mode = NFSACCESS_READ;
257         else
258                 mode = 0;
259         if (vnode_vtype(vp) == VDIR) {
260                 if (acmode & VWRITE)
261                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
262                                  NFSACCESS_DELETE);
263                 if (acmode & VEXEC)
264                         mode |= NFSACCESS_LOOKUP;
265         } else {
266                 if (acmode & VWRITE)
267                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
268                 if (acmode & VEXEC)
269                         mode |= NFSACCESS_EXECUTE;
270         }
271
272         /*
273          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
274          */
275         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
276             NULL);
277
278         /*
279          * The NFS V3 spec does not clarify whether or not
280          * the returned access bits can be a superset of
281          * the ones requested, so...
282          */
283         if (!error && (rmode & mode) != mode)
284                 error = EACCES;
285         return (error);
286 }
287
288 /*
289  * The actual rpc, separated out for Darwin.
290  */
291 int
292 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
293     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
294     void *stuff)
295 {
296         u_int32_t *tl;
297         u_int32_t supported, rmode;
298         int error;
299         struct nfsrv_descript nfsd, *nd = &nfsd;
300         nfsattrbit_t attrbits;
301
302         *attrflagp = 0;
303         supported = mode;
304         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
305         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
306         *tl = txdr_unsigned(mode);
307         if (nd->nd_flag & ND_NFSV4) {
308                 /*
309                  * And do a Getattr op.
310                  */
311                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
312                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
313                 NFSGETATTR_ATTRBIT(&attrbits);
314                 (void) nfsrv_putattrbit(nd, &attrbits);
315         }
316         error = nfscl_request(nd, vp, p, cred, stuff);
317         if (error)
318                 return (error);
319         if (nd->nd_flag & ND_NFSV3) {
320                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
321                 if (error)
322                         goto nfsmout;
323         }
324         if (!nd->nd_repstat) {
325                 if (nd->nd_flag & ND_NFSV4) {
326                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
327                         supported = fxdr_unsigned(u_int32_t, *tl++);
328                 } else {
329                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
330                 }
331                 rmode = fxdr_unsigned(u_int32_t, *tl);
332                 if (nd->nd_flag & ND_NFSV4)
333                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
334
335                 /*
336                  * It's not obvious what should be done about
337                  * unsupported access modes. For now, be paranoid
338                  * and clear the unsupported ones.
339                  */
340                 rmode &= supported;
341                 *rmodep = rmode;
342         } else
343                 error = nd->nd_repstat;
344 nfsmout:
345         m_freem(nd->nd_mrep);
346         return (error);
347 }
348
349 /*
350  * nfs open rpc
351  */
352 int
353 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
354 {
355         struct nfsclopen *op;
356         struct nfscldeleg *dp;
357         struct nfsfh *nfhp;
358         struct nfsnode *np = VTONFS(vp);
359         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
360         u_int32_t mode, clidrev;
361         int ret, newone, error, expireret = 0, retrycnt;
362
363         /*
364          * For NFSv4, Open Ops are only done on Regular Files.
365          */
366         if (vnode_vtype(vp) != VREG)
367                 return (0);
368         mode = 0;
369         if (amode & FREAD)
370                 mode |= NFSV4OPEN_ACCESSREAD;
371         if (amode & FWRITE)
372                 mode |= NFSV4OPEN_ACCESSWRITE;
373         nfhp = np->n_fhp;
374
375         retrycnt = 0;
376 #ifdef notdef
377 { char name[100]; int namel;
378 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
379 bcopy(NFS4NODENAME(np->n_v4), name, namel);
380 name[namel] = '\0';
381 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
382 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
383 else printf(" fhl=0\n");
384 }
385 #endif
386         do {
387             dp = NULL;
388             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
389                 cred, p, NULL, &op, &newone, &ret, 1);
390             if (error) {
391                 return (error);
392             }
393             if (nmp->nm_clp != NULL)
394                 clidrev = nmp->nm_clp->nfsc_clientidrev;
395             else
396                 clidrev = 0;
397             if (ret == NFSCLOPEN_DOOPEN) {
398                 if (np->n_v4 != NULL) {
399                         /*
400                          * For the first attempt, try and get a layout, if
401                          * pNFS is enabled for the mount.
402                          */
403                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
404                             nfs_numnfscbd == 0 ||
405                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
406                                 error = nfsrpc_openrpc(nmp, vp,
407                                     np->n_v4->n4_data,
408                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
409                                     np->n_fhp->nfh_len, mode, op,
410                                     NFS4NODENAME(np->n_v4),
411                                     np->n_v4->n4_namelen,
412                                     &dp, 0, 0x0, cred, p, 0, 0);
413                         else
414                                 error = nfsrpc_getopenlayout(nmp, vp,
415                                     np->n_v4->n4_data,
416                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
417                                     np->n_fhp->nfh_len, mode, op,
418                                     NFS4NODENAME(np->n_v4),
419                                     np->n_v4->n4_namelen, &dp, cred, p);
420                         if (dp != NULL) {
421 #ifdef APPLE
422                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
423 #else
424                                 NFSLOCKNODE(np);
425                                 np->n_flag &= ~NDELEGMOD;
426                                 /*
427                                  * Invalidate the attribute cache, so that
428                                  * attributes that pre-date the issue of a
429                                  * delegation are not cached, since the
430                                  * cached attributes will remain valid while
431                                  * the delegation is held.
432                                  */
433                                 NFSINVALATTRCACHE(np);
434                                 NFSUNLOCKNODE(np);
435 #endif
436                                 (void) nfscl_deleg(nmp->nm_mountp,
437                                     op->nfso_own->nfsow_clp,
438                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
439                         }
440                 } else {
441                         error = EIO;
442                 }
443                 newnfs_copyincred(cred, &op->nfso_cred);
444             } else if (ret == NFSCLOPEN_SETCRED)
445                 /*
446                  * This is a new local open on a delegation. It needs
447                  * to have credentials so that an open can be done
448                  * against the server during recovery.
449                  */
450                 newnfs_copyincred(cred, &op->nfso_cred);
451
452             /*
453              * nfso_opencnt is the count of how many VOP_OPEN()s have
454              * been done on this Open successfully and a VOP_CLOSE()
455              * is expected for each of these.
456              * If error is non-zero, don't increment it, since the Open
457              * hasn't succeeded yet.
458              */
459             if (!error)
460                 op->nfso_opencnt++;
461             nfscl_openrelease(nmp, op, error, newone);
462             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
463                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
464                 error == NFSERR_BADSESSION) {
465                 (void) nfs_catnap(PZERO, error, "nfs_open");
466             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
467                 && clidrev != 0) {
468                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
469                 retrycnt++;
470             }
471         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
472             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
473             error == NFSERR_BADSESSION ||
474             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
475              expireret == 0 && clidrev != 0 && retrycnt < 4));
476         if (error && retrycnt >= 4)
477                 error = EIO;
478         return (error);
479 }
480
481 /*
482  * the actual open rpc
483  */
484 int
485 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
486     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
487     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
488     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
489     int syscred, int recursed)
490 {
491         u_int32_t *tl;
492         struct nfsrv_descript nfsd, *nd = &nfsd;
493         struct nfscldeleg *dp, *ndp = NULL;
494         struct nfsvattr nfsva;
495         u_int32_t rflags, deleg;
496         nfsattrbit_t attrbits;
497         int error, ret, acesize, limitby;
498         struct nfsclsession *tsep;
499
500         dp = *dpp;
501         *dpp = NULL;
502         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
503             false);
504         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
505         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
506         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
507         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
508         tsep = nfsmnt_mdssession(nmp);
509         *tl++ = tsep->nfsess_clientid.lval[0];
510         *tl = tsep->nfsess_clientid.lval[1];
511         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
512         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
513         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
514         if (reclaim) {
515                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
516                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
517                 *tl = txdr_unsigned(delegtype);
518         } else {
519                 if (dp != NULL) {
520                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
521                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
522                         if (NFSHASNFSV4N(nmp))
523                                 *tl++ = 0;
524                         else
525                                 *tl++ = dp->nfsdl_stateid.seqid;
526                         *tl++ = dp->nfsdl_stateid.other[0];
527                         *tl++ = dp->nfsdl_stateid.other[1];
528                         *tl = dp->nfsdl_stateid.other[2];
529                 } else {
530                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
531                 }
532                 (void) nfsm_strtom(nd, name, namelen);
533         }
534         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
535         *tl = txdr_unsigned(NFSV4OP_GETATTR);
536         NFSZERO_ATTRBIT(&attrbits);
537         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
538         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
539         (void) nfsrv_putattrbit(nd, &attrbits);
540         if (syscred)
541                 nd->nd_flag |= ND_USEGSSNAME;
542         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
543             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
544         if (error)
545                 return (error);
546         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
547         if (!nd->nd_repstat) {
548                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
549                     6 * NFSX_UNSIGNED);
550                 op->nfso_stateid.seqid = *tl++;
551                 op->nfso_stateid.other[0] = *tl++;
552                 op->nfso_stateid.other[1] = *tl++;
553                 op->nfso_stateid.other[2] = *tl;
554                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
555                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
556                 if (error)
557                         goto nfsmout;
558                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
559                 deleg = fxdr_unsigned(u_int32_t, *tl);
560                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
561                     deleg == NFSV4OPEN_DELEGATEWRITE) {
562                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
563                               NFSCLFLAGS_FIRSTDELEG))
564                                 op->nfso_own->nfsow_clp->nfsc_flags |=
565                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
566                         ndp = malloc(
567                             sizeof (struct nfscldeleg) + newfhlen,
568                             M_NFSCLDELEG, M_WAITOK);
569                         LIST_INIT(&ndp->nfsdl_owner);
570                         LIST_INIT(&ndp->nfsdl_lock);
571                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
572                         ndp->nfsdl_fhlen = newfhlen;
573                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
574                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
575                         nfscl_lockinit(&ndp->nfsdl_rwlock);
576                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
577                             NFSX_UNSIGNED);
578                         ndp->nfsdl_stateid.seqid = *tl++;
579                         ndp->nfsdl_stateid.other[0] = *tl++;
580                         ndp->nfsdl_stateid.other[1] = *tl++;
581                         ndp->nfsdl_stateid.other[2] = *tl++;
582                         ret = fxdr_unsigned(int, *tl);
583                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
584                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
585                                 /*
586                                  * Indicates how much the file can grow.
587                                  */
588                                 NFSM_DISSECT(tl, u_int32_t *,
589                                     3 * NFSX_UNSIGNED);
590                                 limitby = fxdr_unsigned(int, *tl++);
591                                 switch (limitby) {
592                                 case NFSV4OPEN_LIMITSIZE:
593                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
594                                         break;
595                                 case NFSV4OPEN_LIMITBLOCKS:
596                                         ndp->nfsdl_sizelimit =
597                                             fxdr_unsigned(u_int64_t, *tl++);
598                                         ndp->nfsdl_sizelimit *=
599                                             fxdr_unsigned(u_int64_t, *tl);
600                                         break;
601                                 default:
602                                         error = NFSERR_BADXDR;
603                                         goto nfsmout;
604                                 }
605                         } else {
606                                 ndp->nfsdl_flags = NFSCLDL_READ;
607                         }
608                         if (ret)
609                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
610                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
611                             &acesize, p);
612                         if (error)
613                                 goto nfsmout;
614                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
615                         error = NFSERR_BADXDR;
616                         goto nfsmout;
617                 }
618                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
619                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
620                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
621                     NULL, NULL, NULL, p, cred);
622                 if (error)
623                         goto nfsmout;
624                 if (ndp != NULL) {
625                         ndp->nfsdl_change = nfsva.na_filerev;
626                         ndp->nfsdl_modtime = nfsva.na_mtime;
627                         ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
628                 }
629                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
630                     do {
631                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
632                             cred, p);
633                         if (ret == NFSERR_DELAY)
634                             (void) nfs_catnap(PZERO, ret, "nfs_open");
635                     } while (ret == NFSERR_DELAY);
636                     error = ret;
637                 }
638                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
639                     nfscl_assumeposixlocks)
640                     op->nfso_posixlock = 1;
641                 else
642                     op->nfso_posixlock = 0;
643
644                 /*
645                  * If the server is handing out delegations, but we didn't
646                  * get one because an OpenConfirm was required, try the
647                  * Open again, to get a delegation. This is a harmless no-op,
648                  * from a server's point of view.
649                  */
650                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
651                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
652                     && !error && dp == NULL && ndp == NULL && !recursed) {
653                     do {
654                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
655                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
656                             cred, p, syscred, 1);
657                         if (ret == NFSERR_DELAY)
658                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
659                     } while (ret == NFSERR_DELAY);
660                     if (ret) {
661                         if (ndp != NULL) {
662                                 free(ndp, M_NFSCLDELEG);
663                                 ndp = NULL;
664                         }
665                         if (ret == NFSERR_STALECLIENTID ||
666                             ret == NFSERR_STALEDONTRECOVER ||
667                             ret == NFSERR_BADSESSION)
668                                 error = ret;
669                     }
670                 }
671         }
672         if (nd->nd_repstat != 0 && error == 0)
673                 error = nd->nd_repstat;
674         if (error == NFSERR_STALECLIENTID)
675                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
676 nfsmout:
677         if (!error)
678                 *dpp = ndp;
679         else if (ndp != NULL)
680                 free(ndp, M_NFSCLDELEG);
681         m_freem(nd->nd_mrep);
682         return (error);
683 }
684
685 /*
686  * open downgrade rpc
687  */
688 int
689 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
690     struct ucred *cred, NFSPROC_T *p)
691 {
692         u_int32_t *tl;
693         struct nfsrv_descript nfsd, *nd = &nfsd;
694         int error;
695
696         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
697         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
698         if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
699                 *tl++ = 0;
700         else
701                 *tl++ = op->nfso_stateid.seqid;
702         *tl++ = op->nfso_stateid.other[0];
703         *tl++ = op->nfso_stateid.other[1];
704         *tl++ = op->nfso_stateid.other[2];
705         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
706         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
707         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
708         error = nfscl_request(nd, vp, p, cred, NULL);
709         if (error)
710                 return (error);
711         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
712         if (!nd->nd_repstat) {
713                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
714                 op->nfso_stateid.seqid = *tl++;
715                 op->nfso_stateid.other[0] = *tl++;
716                 op->nfso_stateid.other[1] = *tl++;
717                 op->nfso_stateid.other[2] = *tl;
718         }
719         if (nd->nd_repstat && error == 0)
720                 error = nd->nd_repstat;
721         if (error == NFSERR_STALESTATEID)
722                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
723 nfsmout:
724         m_freem(nd->nd_mrep);
725         return (error);
726 }
727
728 /*
729  * V4 Close operation.
730  */
731 int
732 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
733 {
734         struct nfsclclient *clp;
735         int error;
736
737         if (vnode_vtype(vp) != VREG)
738                 return (0);
739         if (doclose)
740                 error = nfscl_doclose(vp, &clp, p);
741         else
742                 error = nfscl_getclose(vp, &clp);
743         if (error)
744                 return (error);
745
746         nfscl_clientrelease(clp);
747         return (0);
748 }
749
750 /*
751  * Close the open.
752  */
753 void
754 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
755 {
756         struct nfsrv_descript nfsd, *nd = &nfsd;
757         struct nfscllockowner *lp, *nlp;
758         struct nfscllock *lop, *nlop;
759         struct ucred *tcred;
760         u_int64_t off = 0, len = 0;
761         u_int32_t type = NFSV4LOCKT_READ;
762         int error, do_unlock, trycnt;
763
764         tcred = newnfs_getcred();
765         newnfs_copycred(&op->nfso_cred, tcred);
766         /*
767          * (Theoretically this could be done in the same
768          *  compound as the close, but having multiple
769          *  sequenced Ops in the same compound might be
770          *  too scary for some servers.)
771          */
772         if (op->nfso_posixlock) {
773                 off = 0;
774                 len = NFS64BITSSET;
775                 type = NFSV4LOCKT_READ;
776         }
777
778         /*
779          * Since this function is only called from VOP_INACTIVE(), no
780          * other thread will be manipulating this Open. As such, the
781          * lock lists are not being changed by other threads, so it should
782          * be safe to do this without locking.
783          */
784         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
785                 do_unlock = 1;
786                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
787                         if (op->nfso_posixlock == 0) {
788                                 off = lop->nfslo_first;
789                                 len = lop->nfslo_end - lop->nfslo_first;
790                                 if (lop->nfslo_type == F_WRLCK)
791                                         type = NFSV4LOCKT_WRITE;
792                                 else
793                                         type = NFSV4LOCKT_READ;
794                         }
795                         if (do_unlock) {
796                                 trycnt = 0;
797                                 do {
798                                         error = nfsrpc_locku(nd, nmp, lp, off,
799                                             len, type, tcred, p, 0);
800                                         if ((nd->nd_repstat == NFSERR_GRACE ||
801                                             nd->nd_repstat == NFSERR_DELAY) &&
802                                             error == 0)
803                                                 (void) nfs_catnap(PZERO,
804                                                     (int)nd->nd_repstat,
805                                                     "nfs_close");
806                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
807                                     nd->nd_repstat == NFSERR_DELAY) &&
808                                     error == 0 && trycnt++ < 5);
809                                 if (op->nfso_posixlock)
810                                         do_unlock = 0;
811                         }
812                         nfscl_freelock(lop, 0);
813                 }
814                 /*
815                  * Do a ReleaseLockOwner.
816                  * The lock owner name nfsl_owner may be used by other opens for
817                  * other files but the lock_owner4 name that nfsrpc_rellockown()
818                  * puts on the wire has the file handle for this file appended
819                  * to it, so it can be done now.
820                  */
821                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
822                     lp->nfsl_open->nfso_fhlen, tcred, p);
823         }
824
825         /*
826          * There could be other Opens for different files on the same
827          * OpenOwner, so locking is required.
828          */
829         NFSLOCKCLSTATE();
830         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
831         NFSUNLOCKCLSTATE();
832         do {
833                 error = nfscl_tryclose(op, tcred, nmp, p);
834                 if (error == NFSERR_GRACE)
835                         (void) nfs_catnap(PZERO, error, "nfs_close");
836         } while (error == NFSERR_GRACE);
837         NFSLOCKCLSTATE();
838         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
839
840         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
841                 nfscl_freelockowner(lp, 0);
842         nfscl_freeopen(op, 0);
843         NFSUNLOCKCLSTATE();
844         NFSFREECRED(tcred);
845 }
846
847 /*
848  * The actual Close RPC.
849  */
850 int
851 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
852     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
853     int syscred)
854 {
855         u_int32_t *tl;
856         int error;
857
858         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
859             op->nfso_fhlen, NULL, NULL, 0, 0, false);
860         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
861         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
862         if (NFSHASNFSV4N(nmp))
863                 *tl++ = 0;
864         else
865                 *tl++ = op->nfso_stateid.seqid;
866         *tl++ = op->nfso_stateid.other[0];
867         *tl++ = op->nfso_stateid.other[1];
868         *tl = op->nfso_stateid.other[2];
869         if (syscred)
870                 nd->nd_flag |= ND_USEGSSNAME;
871         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
872             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
873         if (error)
874                 return (error);
875         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
876         if (nd->nd_repstat == 0)
877                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
878         error = nd->nd_repstat;
879         if (error == NFSERR_STALESTATEID)
880                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
881 nfsmout:
882         m_freem(nd->nd_mrep);
883         return (error);
884 }
885
886 /*
887  * V4 Open Confirm RPC.
888  */
889 int
890 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
891     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
892 {
893         u_int32_t *tl;
894         struct nfsrv_descript nfsd, *nd = &nfsd;
895         struct nfsmount *nmp;
896         int error;
897
898         nmp = VFSTONFS(vp->v_mount);
899         if (NFSHASNFSV4N(nmp))
900                 return (0);             /* No confirmation for NFSv4.1. */
901         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
902             0, 0, false);
903         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
904         *tl++ = op->nfso_stateid.seqid;
905         *tl++ = op->nfso_stateid.other[0];
906         *tl++ = op->nfso_stateid.other[1];
907         *tl++ = op->nfso_stateid.other[2];
908         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
909         error = nfscl_request(nd, vp, p, cred, NULL);
910         if (error)
911                 return (error);
912         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
913         if (!nd->nd_repstat) {
914                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
915                 op->nfso_stateid.seqid = *tl++;
916                 op->nfso_stateid.other[0] = *tl++;
917                 op->nfso_stateid.other[1] = *tl++;
918                 op->nfso_stateid.other[2] = *tl;
919         }
920         error = nd->nd_repstat;
921         if (error == NFSERR_STALESTATEID)
922                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
923 nfsmout:
924         m_freem(nd->nd_mrep);
925         return (error);
926 }
927
928 /*
929  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
930  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
931  */
932 int
933 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
934     bool *retokp, struct ucred *cred, NFSPROC_T *p)
935 {
936         u_int32_t *tl;
937         struct nfsrv_descript nfsd;
938         struct nfsrv_descript *nd = &nfsd;
939         nfsattrbit_t attrbits;
940         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
941         u_short port;
942         int error, isinet6 = 0, callblen;
943         nfsquad_t confirm;
944         u_int32_t lease;
945         static u_int32_t rev = 0;
946         struct nfsclds *dsp, *odsp;
947         struct in6_addr a6;
948         struct nfsclsession *tsep;
949
950         if (nfsboottime.tv_sec == 0)
951                 NFSSETBOOTTIME(nfsboottime);
952         if (NFSHASNFSV4N(nmp)) {
953                 error = NFSERR_BADSESSION;
954                 odsp = dsp = NULL;
955                 if (retokp != NULL) {
956                         NFSLOCKMNT(nmp);
957                         odsp = TAILQ_FIRST(&nmp->nm_sess);
958                         NFSUNLOCKMNT(nmp);
959                 }
960                 if (odsp != NULL) {
961                         /*
962                          * When a session already exists, first try a
963                          * CreateSession with the extant ClientID.
964                          */
965                         dsp = malloc(sizeof(struct nfsclds) +
966                             odsp->nfsclds_servownlen + 1, M_NFSCLDS,
967                             M_WAITOK | M_ZERO);
968                         dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
969                         dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
970                         dsp->nfsclds_sess.nfsess_clientid =
971                             odsp->nfsclds_sess.nfsess_clientid;
972                         dsp->nfsclds_sess.nfsess_sequenceid =
973                             odsp->nfsclds_sess.nfsess_sequenceid;
974                         dsp->nfsclds_flags = odsp->nfsclds_flags;
975                         if (dsp->nfsclds_servownlen > 0)
976                                 memcpy(dsp->nfsclds_serverown,
977                                     odsp->nfsclds_serverown,
978                                     dsp->nfsclds_servownlen + 1);
979                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
980                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
981                             NULL, MTX_DEF);
982                         nfscl_initsessionslots(&dsp->nfsclds_sess);
983                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
984                             &nmp->nm_sockreq, NULL,
985                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
986                         NFSCL_DEBUG(1, "create session for extant "
987                             "ClientID=%d\n", error);
988                         if (error != 0) {
989                                 nfscl_freenfsclds(dsp);
990                                 dsp = NULL;
991                                 /*
992                                  * If *retokp is true, return any error other
993                                  * than NFSERR_STALECLIENTID,
994                                  * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
995                                  * so that nfscl_recover() will not loop.
996                                  */
997                                 if (*retokp)
998                                         return (NFSERR_IO);
999                         } else
1000                                 *retokp = true;
1001                 } else if (retokp != NULL && *retokp)
1002                         return (NFSERR_IO);
1003                 if (error != 0) {
1004                         /*
1005                          * Either there was no previous session or the
1006                          * CreateSession attempt failed, so...
1007                          * do an ExchangeID followed by the CreateSession.
1008                          */
1009                         clp->nfsc_rev = rev++;
1010                         error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1011                             NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1012                             cred, p);
1013                         NFSCL_DEBUG(1, "aft exch=%d\n", error);
1014                         if (error == 0)
1015                                 error = nfsrpc_createsession(nmp,
1016                                     &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1017                                     dsp->nfsclds_sess.nfsess_sequenceid, 1,
1018                                     cred, p);
1019                         NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1020                 }
1021                 if (error == 0) {
1022                         NFSLOCKMNT(nmp);
1023                         /*
1024                          * The old sessions cannot be safely free'd
1025                          * here, since they may still be used by
1026                          * in-progress RPCs.
1027                          */
1028                         tsep = NULL;
1029                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
1030                                 tsep = NFSMNT_MDSSESSION(nmp);
1031                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1032                             nfsclds_list);
1033                         /*
1034                          * Wake up RPCs waiting for a slot on the
1035                          * old session. These will then fail with
1036                          * NFSERR_BADSESSION and be retried with the
1037                          * new session by nfsv4_setsequence().
1038                          * Also wakeup() processes waiting for the
1039                          * new session.
1040                          */
1041                         if (tsep != NULL)
1042                                 wakeup(&tsep->nfsess_slots);
1043                         wakeup(&nmp->nm_sess);
1044                         NFSUNLOCKMNT(nmp);
1045                 } else if (dsp != NULL)
1046                         nfscl_freenfsclds(dsp);
1047                 if (error == 0 && reclaim == 0) {
1048                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
1049                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1050                         if (error == NFSERR_COMPLETEALREADY ||
1051                             error == NFSERR_NOTSUPP)
1052                                 /* Ignore this error. */
1053                                 error = 0;
1054                 }
1055                 return (error);
1056         } else if (retokp != NULL && *retokp)
1057                 return (NFSERR_IO);
1058         clp->nfsc_rev = rev++;
1059
1060         /*
1061          * Allocate a single session structure for NFSv4.0, because some of
1062          * the fields are used by NFSv4.0 although it doesn't do a session.
1063          */
1064         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1065         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1066         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1067         NFSLOCKMNT(nmp);
1068         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1069         tsep = NFSMNT_MDSSESSION(nmp);
1070         NFSUNLOCKMNT(nmp);
1071
1072         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1073             false);
1074         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1075         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
1076         *tl = txdr_unsigned(clp->nfsc_rev);
1077         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1078
1079         /*
1080          * set up the callback address
1081          */
1082         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1083         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1084         callblen = strlen(nfsv4_callbackaddr);
1085         if (callblen == 0)
1086                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1087         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1088             (callblen > 0 || cp != NULL)) {
1089                 port = htons(nfsv4_cbport);
1090                 cp2 = (u_int8_t *)&port;
1091 #ifdef INET6
1092                 if ((callblen > 0 &&
1093                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1094                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1095
1096                         (void) nfsm_strtom(nd, "tcp6", 4);
1097                         if (callblen == 0) {
1098                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1099                                 ip6add = ip6buf;
1100                         } else {
1101                                 ip6add = nfsv4_callbackaddr;
1102                         }
1103                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1104                             ip6add, cp2[0], cp2[1]);
1105                 } else
1106 #endif
1107                 {
1108                         (void) nfsm_strtom(nd, "tcp", 3);
1109                         if (callblen == 0)
1110                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1111                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1112                                     cp[2], cp[3], cp2[0], cp2[1]);
1113                         else
1114                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1115                                     "%s.%d.%d", nfsv4_callbackaddr,
1116                                     cp2[0], cp2[1]);
1117                 }
1118                 (void) nfsm_strtom(nd, addr, strlen(addr));
1119         } else {
1120                 (void) nfsm_strtom(nd, "tcp", 3);
1121                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1122         }
1123         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1124         *tl = txdr_unsigned(clp->nfsc_cbident);
1125         nd->nd_flag |= ND_USEGSSNAME;
1126         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1127                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1128         if (error)
1129                 return (error);
1130         if (nd->nd_repstat == 0) {
1131             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1132             tsep->nfsess_clientid.lval[0] = *tl++;
1133             tsep->nfsess_clientid.lval[1] = *tl++;
1134             confirm.lval[0] = *tl++;
1135             confirm.lval[1] = *tl;
1136             m_freem(nd->nd_mrep);
1137             nd->nd_mrep = NULL;
1138
1139             /*
1140              * and confirm it.
1141              */
1142             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1143                 NULL, 0, 0, false);
1144             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1145             *tl++ = tsep->nfsess_clientid.lval[0];
1146             *tl++ = tsep->nfsess_clientid.lval[1];
1147             *tl++ = confirm.lval[0];
1148             *tl = confirm.lval[1];
1149             nd->nd_flag |= ND_USEGSSNAME;
1150             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1151                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1152             if (error)
1153                 return (error);
1154             m_freem(nd->nd_mrep);
1155             nd->nd_mrep = NULL;
1156             if (nd->nd_repstat == 0) {
1157                 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1158                     nmp->nm_fhsize, NULL, NULL, 0, 0, false);
1159                 NFSZERO_ATTRBIT(&attrbits);
1160                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1161                 (void) nfsrv_putattrbit(nd, &attrbits);
1162                 nd->nd_flag |= ND_USEGSSNAME;
1163                 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1164                     cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1165                 if (error)
1166                     return (error);
1167                 if (nd->nd_repstat == 0) {
1168                     error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1169                         NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1170                     if (error)
1171                         goto nfsmout;
1172                     clp->nfsc_renew = NFSCL_RENEW(lease);
1173                     clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1174                     clp->nfsc_clientidrev++;
1175                     if (clp->nfsc_clientidrev == 0)
1176                         clp->nfsc_clientidrev++;
1177                 }
1178             }
1179         }
1180         error = nd->nd_repstat;
1181 nfsmout:
1182         m_freem(nd->nd_mrep);
1183         return (error);
1184 }
1185
1186 /*
1187  * nfs getattr call.
1188  */
1189 int
1190 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1191     struct nfsvattr *nap, void *stuff)
1192 {
1193         struct nfsrv_descript nfsd, *nd = &nfsd;
1194         int error;
1195         nfsattrbit_t attrbits;
1196         
1197         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1198         if (nd->nd_flag & ND_NFSV4) {
1199                 NFSGETATTR_ATTRBIT(&attrbits);
1200                 (void) nfsrv_putattrbit(nd, &attrbits);
1201         }
1202         error = nfscl_request(nd, vp, p, cred, stuff);
1203         if (error)
1204                 return (error);
1205         if (!nd->nd_repstat)
1206                 error = nfsm_loadattr(nd, nap);
1207         else
1208                 error = nd->nd_repstat;
1209         m_freem(nd->nd_mrep);
1210         return (error);
1211 }
1212
1213 /*
1214  * nfs getattr call with non-vnode arguemnts.
1215  */
1216 int
1217 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1218     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1219     uint32_t *leasep)
1220 {
1221         struct nfsrv_descript nfsd, *nd = &nfsd;
1222         int error, vers = NFS_VER2;
1223         nfsattrbit_t attrbits;
1224         
1225         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1226             false);
1227         if (nd->nd_flag & ND_NFSV4) {
1228                 vers = NFS_VER4;
1229                 NFSGETATTR_ATTRBIT(&attrbits);
1230                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1231                 (void) nfsrv_putattrbit(nd, &attrbits);
1232         } else if (nd->nd_flag & ND_NFSV3) {
1233                 vers = NFS_VER3;
1234         }
1235         if (syscred)
1236                 nd->nd_flag |= ND_USEGSSNAME;
1237         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1238             NFS_PROG, vers, NULL, 1, xidp, NULL);
1239         if (error)
1240                 return (error);
1241         if (nd->nd_repstat == 0) {
1242                 if ((nd->nd_flag & ND_NFSV4) != 0)
1243                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1244                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1245                             NULL, NULL);
1246                 else
1247                         error = nfsm_loadattr(nd, nap);
1248         } else
1249                 error = nd->nd_repstat;
1250         m_freem(nd->nd_mrep);
1251         return (error);
1252 }
1253
1254 /*
1255  * Do an nfs setattr operation.
1256  */
1257 int
1258 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1259     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1260     void *stuff)
1261 {
1262         int error, expireret = 0, openerr, retrycnt;
1263         u_int32_t clidrev = 0, mode;
1264         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1265         struct nfsfh *nfhp;
1266         nfsv4stateid_t stateid;
1267         void *lckp;
1268
1269         if (nmp->nm_clp != NULL)
1270                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1271         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1272                 mode = NFSV4OPEN_ACCESSWRITE;
1273         else
1274                 mode = NFSV4OPEN_ACCESSREAD;
1275         retrycnt = 0;
1276         do {
1277                 lckp = NULL;
1278                 openerr = 1;
1279                 if (NFSHASNFSV4(nmp)) {
1280                         nfhp = VTONFS(vp)->n_fhp;
1281                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1282                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1283                         if (error && vnode_vtype(vp) == VREG &&
1284                             (mode == NFSV4OPEN_ACCESSWRITE ||
1285                              nfstest_openallsetattr)) {
1286                                 /*
1287                                  * No Open stateid, so try and open the file
1288                                  * now.
1289                                  */
1290                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1291                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1292                                             p);
1293                                 else
1294                                         openerr = nfsrpc_open(vp, FREAD, cred,
1295                                             p);
1296                                 if (!openerr)
1297                                         (void) nfscl_getstateid(vp,
1298                                             nfhp->nfh_fh, nfhp->nfh_len,
1299                                             mode, 0, cred, p, &stateid, &lckp);
1300                         }
1301                 }
1302                 if (vap != NULL)
1303                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1304                             rnap, attrflagp, stuff);
1305                 else
1306                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1307                             stuff);
1308                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1309                         NFSLOCKMNT(nmp);
1310                         nmp->nm_state |= NFSSTA_OPENMODE;
1311                         NFSUNLOCKMNT(nmp);
1312                 }
1313                 if (error == NFSERR_STALESTATEID)
1314                         nfscl_initiate_recovery(nmp->nm_clp);
1315                 if (lckp != NULL)
1316                         nfscl_lockderef(lckp);
1317                 if (!openerr)
1318                         (void) nfsrpc_close(vp, 0, p);
1319                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1320                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1321                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1322                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1323                 } else if ((error == NFSERR_EXPIRED ||
1324                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1325                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1326                 }
1327                 retrycnt++;
1328         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1329             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1330             error == NFSERR_BADSESSION ||
1331             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1332             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1333              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1334             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1335              retrycnt < 4));
1336         if (error && retrycnt >= 4)
1337                 error = EIO;
1338         return (error);
1339 }
1340
1341 static int
1342 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1343     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1344     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1345 {
1346         u_int32_t *tl;
1347         struct nfsrv_descript nfsd, *nd = &nfsd;
1348         int error;
1349         nfsattrbit_t attrbits;
1350
1351         *attrflagp = 0;
1352         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1353         if (nd->nd_flag & ND_NFSV4)
1354                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1355         vap->va_type = vnode_vtype(vp);
1356         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1357         if (nd->nd_flag & ND_NFSV3) {
1358                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1359                 *tl = newnfs_false;
1360         } else if (nd->nd_flag & ND_NFSV4) {
1361                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1362                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1363                 NFSGETATTR_ATTRBIT(&attrbits);
1364                 (void) nfsrv_putattrbit(nd, &attrbits);
1365         }
1366         error = nfscl_request(nd, vp, p, cred, stuff);
1367         if (error)
1368                 return (error);
1369         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1370                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1371         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1372                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1373         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1374                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1375         m_freem(nd->nd_mrep);
1376         if (nd->nd_repstat && !error)
1377                 error = nd->nd_repstat;
1378         return (error);
1379 }
1380
1381 /*
1382  * nfs lookup rpc
1383  */
1384 int
1385 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1386     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1387     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1388 {
1389         u_int32_t *tl;
1390         struct nfsrv_descript nfsd, *nd = &nfsd;
1391         struct nfsmount *nmp;
1392         struct nfsnode *np;
1393         struct nfsfh *nfhp;
1394         nfsattrbit_t attrbits;
1395         int error = 0, lookupp = 0;
1396
1397         *attrflagp = 0;
1398         *dattrflagp = 0;
1399         if (vnode_vtype(dvp) != VDIR)
1400                 return (ENOTDIR);
1401         nmp = VFSTONFS(dvp->v_mount);
1402         if (len > NFS_MAXNAMLEN)
1403                 return (ENAMETOOLONG);
1404         if (NFSHASNFSV4(nmp) && len == 1 &&
1405                 name[0] == '.') {
1406                 /*
1407                  * Just return the current dir's fh.
1408                  */
1409                 np = VTONFS(dvp);
1410                 nfhp = malloc(sizeof (struct nfsfh) +
1411                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1412                 nfhp->nfh_len = np->n_fhp->nfh_len;
1413                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1414                 *nfhpp = nfhp;
1415                 return (0);
1416         }
1417         if (NFSHASNFSV4(nmp) && len == 2 &&
1418                 name[0] == '.' && name[1] == '.') {
1419                 lookupp = 1;
1420                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1421         } else {
1422                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1423                 (void) nfsm_strtom(nd, name, len);
1424         }
1425         if (nd->nd_flag & ND_NFSV4) {
1426                 NFSGETATTR_ATTRBIT(&attrbits);
1427                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1428                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1429                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1430                 (void) nfsrv_putattrbit(nd, &attrbits);
1431         }
1432         error = nfscl_request(nd, dvp, p, cred, stuff);
1433         if (error)
1434                 return (error);
1435         if (nd->nd_repstat) {
1436                 /*
1437                  * When an NFSv4 Lookupp returns ENOENT, it means that
1438                  * the lookup is at the root of an fs, so return this dir.
1439                  */
1440                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1441                     np = VTONFS(dvp);
1442                     nfhp = malloc(sizeof (struct nfsfh) +
1443                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1444                     nfhp->nfh_len = np->n_fhp->nfh_len;
1445                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1446                     *nfhpp = nfhp;
1447                     m_freem(nd->nd_mrep);
1448                     return (0);
1449                 }
1450                 if (nd->nd_flag & ND_NFSV3)
1451                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1452                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1453                     ND_NFSV4) {
1454                         /* Load the directory attributes. */
1455                         error = nfsm_loadattr(nd, dnap);
1456                         if (error == 0)
1457                                 *dattrflagp = 1;
1458                 }
1459                 goto nfsmout;
1460         }
1461         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1462                 /* Load the directory attributes. */
1463                 error = nfsm_loadattr(nd, dnap);
1464                 if (error != 0)
1465                         goto nfsmout;
1466                 *dattrflagp = 1;
1467                 /* Skip over the Lookup and GetFH operation status values. */
1468                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1469         }
1470         error = nfsm_getfh(nd, nfhpp);
1471         if (error)
1472                 goto nfsmout;
1473
1474         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1475         if ((nd->nd_flag & ND_NFSV3) && !error)
1476                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1477 nfsmout:
1478         m_freem(nd->nd_mrep);
1479         if (!error && nd->nd_repstat)
1480                 error = nd->nd_repstat;
1481         return (error);
1482 }
1483
1484 /*
1485  * Do a readlink rpc.
1486  */
1487 int
1488 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1489     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1490 {
1491         u_int32_t *tl;
1492         struct nfsrv_descript nfsd, *nd = &nfsd;
1493         struct nfsnode *np = VTONFS(vp);
1494         nfsattrbit_t attrbits;
1495         int error, len, cangetattr = 1;
1496
1497         *attrflagp = 0;
1498         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1499         if (nd->nd_flag & ND_NFSV4) {
1500                 /*
1501                  * And do a Getattr op.
1502                  */
1503                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1504                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1505                 NFSGETATTR_ATTRBIT(&attrbits);
1506                 (void) nfsrv_putattrbit(nd, &attrbits);
1507         }
1508         error = nfscl_request(nd, vp, p, cred, stuff);
1509         if (error)
1510                 return (error);
1511         if (nd->nd_flag & ND_NFSV3)
1512                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1513         if (!nd->nd_repstat && !error) {
1514                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1515                 /*
1516                  * This seems weird to me, but must have been added to
1517                  * FreeBSD for some reason. The only thing I can think of
1518                  * is that there was/is some server that replies with
1519                  * more link data than it should?
1520                  */
1521                 if (len == NFS_MAXPATHLEN) {
1522                         NFSLOCKNODE(np);
1523                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1524                                 len = np->n_size;
1525                                 cangetattr = 0;
1526                         }
1527                         NFSUNLOCKNODE(np);
1528                 }
1529                 error = nfsm_mbufuio(nd, uiop, len);
1530                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1531                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1532         }
1533         if (nd->nd_repstat && !error)
1534                 error = nd->nd_repstat;
1535 nfsmout:
1536         m_freem(nd->nd_mrep);
1537         return (error);
1538 }
1539
1540 /*
1541  * Read operation.
1542  */
1543 int
1544 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1545     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1546 {
1547         int error, expireret = 0, retrycnt;
1548         u_int32_t clidrev = 0;
1549         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1550         struct nfsnode *np = VTONFS(vp);
1551         struct ucred *newcred;
1552         struct nfsfh *nfhp = NULL;
1553         nfsv4stateid_t stateid;
1554         void *lckp;
1555
1556         if (nmp->nm_clp != NULL)
1557                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1558         newcred = cred;
1559         if (NFSHASNFSV4(nmp)) {
1560                 nfhp = np->n_fhp;
1561                 newcred = NFSNEWCRED(cred);
1562         }
1563         retrycnt = 0;
1564         do {
1565                 lckp = NULL;
1566                 if (NFSHASNFSV4(nmp))
1567                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1568                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1569                             &lckp);
1570                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1571                     attrflagp, stuff);
1572                 if (error == NFSERR_OPENMODE) {
1573                         NFSLOCKMNT(nmp);
1574                         nmp->nm_state |= NFSSTA_OPENMODE;
1575                         NFSUNLOCKMNT(nmp);
1576                 }
1577                 if (error == NFSERR_STALESTATEID)
1578                         nfscl_initiate_recovery(nmp->nm_clp);
1579                 if (lckp != NULL)
1580                         nfscl_lockderef(lckp);
1581                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1582                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1583                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1584                         (void) nfs_catnap(PZERO, error, "nfs_read");
1585                 } else if ((error == NFSERR_EXPIRED ||
1586                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1587                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1588                 }
1589                 retrycnt++;
1590         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1591             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1592             error == NFSERR_BADSESSION ||
1593             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1594             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1595              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1596             (error == NFSERR_OPENMODE && retrycnt < 4));
1597         if (error && retrycnt >= 4)
1598                 error = EIO;
1599         if (NFSHASNFSV4(nmp))
1600                 NFSFREECRED(newcred);
1601         return (error);
1602 }
1603
1604 /*
1605  * The actual read RPC.
1606  */
1607 static int
1608 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1609     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1610     int *attrflagp, void *stuff)
1611 {
1612         u_int32_t *tl;
1613         int error = 0, len, retlen, tsiz, eof = 0;
1614         struct nfsrv_descript nfsd;
1615         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1616         struct nfsrv_descript *nd = &nfsd;
1617         int rsize;
1618         off_t tmp_off;
1619
1620         *attrflagp = 0;
1621         tsiz = uiop->uio_resid;
1622         tmp_off = uiop->uio_offset + tsiz;
1623         NFSLOCKMNT(nmp);
1624         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1625                 NFSUNLOCKMNT(nmp);
1626                 return (EFBIG);
1627         }
1628         rsize = nmp->nm_rsize;
1629         NFSUNLOCKMNT(nmp);
1630         nd->nd_mrep = NULL;
1631         while (tsiz > 0) {
1632                 *attrflagp = 0;
1633                 len = (tsiz > rsize) ? rsize : tsiz;
1634                 NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1635                 if (nd->nd_flag & ND_NFSV4)
1636                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1637                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1638                 if (nd->nd_flag & ND_NFSV2) {
1639                         *tl++ = txdr_unsigned(uiop->uio_offset);
1640                         *tl++ = txdr_unsigned(len);
1641                         *tl = 0;
1642                 } else {
1643                         txdr_hyper(uiop->uio_offset, tl);
1644                         *(tl + 2) = txdr_unsigned(len);
1645                 }
1646                 /*
1647                  * Since I can't do a Getattr for NFSv4 for Write, there
1648                  * doesn't seem any point in doing one here, either.
1649                  * (See the comment in nfsrpc_writerpc() for more info.)
1650                  */
1651                 error = nfscl_request(nd, vp, p, cred, stuff);
1652                 if (error)
1653                         return (error);
1654                 if (nd->nd_flag & ND_NFSV3) {
1655                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1656                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1657                         error = nfsm_loadattr(nd, nap);
1658                         if (!error)
1659                                 *attrflagp = 1;
1660                 }
1661                 if (nd->nd_repstat || error) {
1662                         if (!error)
1663                                 error = nd->nd_repstat;
1664                         goto nfsmout;
1665                 }
1666                 if (nd->nd_flag & ND_NFSV3) {
1667                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1668                         eof = fxdr_unsigned(int, *(tl + 1));
1669                 } else if (nd->nd_flag & ND_NFSV4) {
1670                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1671                         eof = fxdr_unsigned(int, *tl);
1672                 }
1673                 NFSM_STRSIZ(retlen, len);
1674                 error = nfsm_mbufuio(nd, uiop, retlen);
1675                 if (error)
1676                         goto nfsmout;
1677                 m_freem(nd->nd_mrep);
1678                 nd->nd_mrep = NULL;
1679                 tsiz -= retlen;
1680                 if (!(nd->nd_flag & ND_NFSV2)) {
1681                         if (eof || retlen == 0)
1682                                 tsiz = 0;
1683                 } else if (retlen < len)
1684                         tsiz = 0;
1685         }
1686         return (0);
1687 nfsmout:
1688         if (nd->nd_mrep != NULL)
1689                 m_freem(nd->nd_mrep);
1690         return (error);
1691 }
1692
1693 /*
1694  * nfs write operation
1695  * When called_from_strategy != 0, it should return EIO for an error that
1696  * indicates recovery is in progress, so that the buffer will be left
1697  * dirty and be written back to the server later. If it loops around,
1698  * the recovery thread could get stuck waiting for the buffer and recovery
1699  * will then deadlock.
1700  */
1701 int
1702 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1703     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1704     void *stuff, int called_from_strategy)
1705 {
1706         int error, expireret = 0, retrycnt, nostateid;
1707         u_int32_t clidrev = 0;
1708         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1709         struct nfsnode *np = VTONFS(vp);
1710         struct ucred *newcred;
1711         struct nfsfh *nfhp = NULL;
1712         nfsv4stateid_t stateid;
1713         void *lckp;
1714
1715         *must_commit = 0;
1716         if (nmp->nm_clp != NULL)
1717                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1718         newcred = cred;
1719         if (NFSHASNFSV4(nmp)) {
1720                 newcred = NFSNEWCRED(cred);
1721                 nfhp = np->n_fhp;
1722         }
1723         retrycnt = 0;
1724         do {
1725                 lckp = NULL;
1726                 nostateid = 0;
1727                 if (NFSHASNFSV4(nmp)) {
1728                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1729                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1730                             &lckp);
1731                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1732                             stateid.other[2] == 0) {
1733                                 nostateid = 1;
1734                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1735                         }
1736                 }
1737
1738                 /*
1739                  * If there is no stateid for NFSv4, it means this is an
1740                  * extraneous write after close. Basically a poorly
1741                  * implemented buffer cache. Just don't do the write.
1742                  */
1743                 if (nostateid)
1744                         error = 0;
1745                 else
1746                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1747                             newcred, &stateid, p, nap, attrflagp, stuff);
1748                 if (error == NFSERR_STALESTATEID)
1749                         nfscl_initiate_recovery(nmp->nm_clp);
1750                 if (lckp != NULL)
1751                         nfscl_lockderef(lckp);
1752                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1753                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1754                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1755                         (void) nfs_catnap(PZERO, error, "nfs_write");
1756                 } else if ((error == NFSERR_EXPIRED ||
1757                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1758                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1759                 }
1760                 retrycnt++;
1761         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1762             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1763               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1764             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1765             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1766              expireret == 0 && clidrev != 0 && retrycnt < 4));
1767         if (error != 0 && (retrycnt >= 4 ||
1768             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1769               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1770                 error = EIO;
1771         if (NFSHASNFSV4(nmp))
1772                 NFSFREECRED(newcred);
1773         return (error);
1774 }
1775
1776 /*
1777  * The actual write RPC.
1778  */
1779 static int
1780 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1781     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1782     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1783 {
1784         u_int32_t *tl;
1785         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1786         struct nfsnode *np = VTONFS(vp);
1787         int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1788         int wccflag = 0, wsize;
1789         int32_t backup;
1790         struct nfsrv_descript nfsd;
1791         struct nfsrv_descript *nd = &nfsd;
1792         nfsattrbit_t attrbits;
1793         off_t tmp_off;
1794
1795         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1796         *attrflagp = 0;
1797         tsiz = uiop->uio_resid;
1798         tmp_off = uiop->uio_offset + tsiz;
1799         NFSLOCKMNT(nmp);
1800         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1801                 NFSUNLOCKMNT(nmp);
1802                 return (EFBIG);
1803         }
1804         wsize = nmp->nm_wsize;
1805         NFSUNLOCKMNT(nmp);
1806         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1807         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1808         while (tsiz > 0) {
1809                 *attrflagp = 0;
1810                 len = (tsiz > wsize) ? wsize : tsiz;
1811                 NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1812                 if (nd->nd_flag & ND_NFSV4) {
1813                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1814                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1815                         txdr_hyper(uiop->uio_offset, tl);
1816                         tl += 2;
1817                         *tl++ = txdr_unsigned(*iomode);
1818                         *tl = txdr_unsigned(len);
1819                 } else if (nd->nd_flag & ND_NFSV3) {
1820                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1821                         txdr_hyper(uiop->uio_offset, tl);
1822                         tl += 2;
1823                         *tl++ = txdr_unsigned(len);
1824                         *tl++ = txdr_unsigned(*iomode);
1825                         *tl = txdr_unsigned(len);
1826                 } else {
1827                         u_int32_t x;
1828
1829                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1830                         /*
1831                          * Not sure why someone changed this, since the
1832                          * RFC clearly states that "beginoffset" and
1833                          * "totalcount" are ignored, but it wouldn't
1834                          * surprise me if there's a busted server out there.
1835                          */
1836                         /* Set both "begin" and "current" to non-garbage. */
1837                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1838                         *tl++ = x;      /* "begin offset" */
1839                         *tl++ = x;      /* "current offset" */
1840                         x = txdr_unsigned(len);
1841                         *tl++ = x;      /* total to this offset */
1842                         *tl = x;        /* size of this write */
1843
1844                 }
1845                 nfsm_uiombuf(nd, uiop, len);
1846                 /*
1847                  * Although it is tempting to do a normal Getattr Op in the
1848                  * NFSv4 compound, the result can be a nearly hung client
1849                  * system if the Getattr asks for Owner and/or OwnerGroup.
1850                  * It occurs when the client can't map either the Owner or
1851                  * Owner_group name in the Getattr reply to a uid/gid. When
1852                  * there is a cache miss, the kernel does an upcall to the
1853                  * nfsuserd. Then, it can try and read the local /etc/passwd
1854                  * or /etc/group file. It can then block in getnewbuf(),
1855                  * waiting for dirty writes to be pushed to the NFS server.
1856                  * The only reason this doesn't result in a complete
1857                  * deadlock, is that the upcall times out and allows
1858                  * the write to complete. However, progress is so slow
1859                  * that it might just as well be deadlocked.
1860                  * As such, we get the rest of the attributes, but not
1861                  * Owner or Owner_group.
1862                  * nb: nfscl_loadattrcache() needs to be told that these
1863                  *     partial attributes from a write rpc are being
1864                  *     passed in, via a argument flag.
1865                  */
1866                 if (nd->nd_flag & ND_NFSV4) {
1867                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1868                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1869                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1870                         (void) nfsrv_putattrbit(nd, &attrbits);
1871                 }
1872                 error = nfscl_request(nd, vp, p, cred, stuff);
1873                 if (error)
1874                         return (error);
1875                 if (nd->nd_repstat) {
1876                         /*
1877                          * In case the rpc gets retried, roll
1878                          * the uio fileds changed by nfsm_uiombuf()
1879                          * back.
1880                          */
1881                         uiop->uio_offset -= len;
1882                         uiop->uio_resid += len;
1883                         uiop->uio_iov->iov_base =
1884                             (char *)uiop->uio_iov->iov_base - len;
1885                         uiop->uio_iov->iov_len += len;
1886                 }
1887                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1888                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1889                             &wccflag, stuff);
1890                         if (error)
1891                                 goto nfsmout;
1892                 }
1893                 if (!nd->nd_repstat) {
1894                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1895                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1896                                         + NFSX_VERF);
1897                                 rlen = fxdr_unsigned(int, *tl++);
1898                                 if (rlen == 0) {
1899                                         error = NFSERR_IO;
1900                                         goto nfsmout;
1901                                 } else if (rlen < len) {
1902                                         backup = len - rlen;
1903                                         uiop->uio_iov->iov_base =
1904                                             (char *)uiop->uio_iov->iov_base -
1905                                             backup;
1906                                         uiop->uio_iov->iov_len += backup;
1907                                         uiop->uio_offset -= backup;
1908                                         uiop->uio_resid += backup;
1909                                         len = rlen;
1910                                 }
1911                                 commit = fxdr_unsigned(int, *tl++);
1912
1913                                 /*
1914                                  * Return the lowest commitment level
1915                                  * obtained by any of the RPCs.
1916                                  */
1917                                 if (committed == NFSWRITE_FILESYNC)
1918                                         committed = commit;
1919                                 else if (committed == NFSWRITE_DATASYNC &&
1920                                         commit == NFSWRITE_UNSTABLE)
1921                                         committed = commit;
1922                                 NFSLOCKMNT(nmp);
1923                                 if (!NFSHASWRITEVERF(nmp)) {
1924                                         NFSBCOPY((caddr_t)tl,
1925                                             (caddr_t)&nmp->nm_verf[0],
1926                                             NFSX_VERF);
1927                                         NFSSETWRITEVERF(nmp);
1928                                 } else if (NFSBCMP(tl, nmp->nm_verf,
1929                                     NFSX_VERF)) {
1930                                         *must_commit = 1;
1931                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1932                                 }
1933                                 NFSUNLOCKMNT(nmp);
1934                         }
1935                         if (nd->nd_flag & ND_NFSV4)
1936                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1937                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1938                                 error = nfsm_loadattr(nd, nap);
1939                                 if (!error)
1940                                         *attrflagp = NFS_LATTR_NOSHRINK;
1941                         }
1942                 } else {
1943                         error = nd->nd_repstat;
1944                 }
1945                 if (error)
1946                         goto nfsmout;
1947                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1948                 m_freem(nd->nd_mrep);
1949                 nd->nd_mrep = NULL;
1950                 tsiz -= len;
1951         }
1952 nfsmout:
1953         if (nd->nd_mrep != NULL)
1954                 m_freem(nd->nd_mrep);
1955         *iomode = committed;
1956         if (nd->nd_repstat && !error)
1957                 error = nd->nd_repstat;
1958         return (error);
1959 }
1960
1961 /*
1962  * nfs mknod rpc
1963  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1964  * mode set to specify the file type and the size field for rdev.
1965  */
1966 int
1967 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1968     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1969     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1970     int *attrflagp, int *dattrflagp, void *dstuff)
1971 {
1972         u_int32_t *tl;
1973         int error = 0;
1974         struct nfsrv_descript nfsd, *nd = &nfsd;
1975         nfsattrbit_t attrbits;
1976
1977         *nfhpp = NULL;
1978         *attrflagp = 0;
1979         *dattrflagp = 0;
1980         if (namelen > NFS_MAXNAMLEN)
1981                 return (ENAMETOOLONG);
1982         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1983         if (nd->nd_flag & ND_NFSV4) {
1984                 if (vtyp == VBLK || vtyp == VCHR) {
1985                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1986                         *tl++ = vtonfsv34_type(vtyp);
1987                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1988                         *tl = txdr_unsigned(NFSMINOR(rdev));
1989                 } else {
1990                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1991                         *tl = vtonfsv34_type(vtyp);
1992                 }
1993         }
1994         (void) nfsm_strtom(nd, name, namelen);
1995         if (nd->nd_flag & ND_NFSV3) {
1996                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1997                 *tl = vtonfsv34_type(vtyp);
1998         }
1999         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2000                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2001         if ((nd->nd_flag & ND_NFSV3) &&
2002             (vtyp == VCHR || vtyp == VBLK)) {
2003                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2004                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2005                 *tl = txdr_unsigned(NFSMINOR(rdev));
2006         }
2007         if (nd->nd_flag & ND_NFSV4) {
2008                 NFSGETATTR_ATTRBIT(&attrbits);
2009                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2010                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2011                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2012                 (void) nfsrv_putattrbit(nd, &attrbits);
2013         }
2014         if (nd->nd_flag & ND_NFSV2)
2015                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2016         error = nfscl_request(nd, dvp, p, cred, dstuff);
2017         if (error)
2018                 return (error);
2019         if (nd->nd_flag & ND_NFSV4)
2020                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2021         if (!nd->nd_repstat) {
2022                 if (nd->nd_flag & ND_NFSV4) {
2023                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2024                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2025                         if (error)
2026                                 goto nfsmout;
2027                 }
2028                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2029                 if (error)
2030                         goto nfsmout;
2031         }
2032         if (nd->nd_flag & ND_NFSV3)
2033                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2034         if (!error && nd->nd_repstat)
2035                 error = nd->nd_repstat;
2036 nfsmout:
2037         m_freem(nd->nd_mrep);
2038         return (error);
2039 }
2040
2041 /*
2042  * nfs file create call
2043  * Mostly just call the approriate routine. (I separated out v4, so that
2044  * error recovery wouldn't be as difficult.)
2045  */
2046 int
2047 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2048     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2049     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2050     int *attrflagp, int *dattrflagp, void *dstuff)
2051 {
2052         int error = 0, newone, expireret = 0, retrycnt, unlocked;
2053         struct nfsclowner *owp;
2054         struct nfscldeleg *dp;
2055         struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2056         u_int32_t clidrev;
2057
2058         if (NFSHASNFSV4(nmp)) {
2059             retrycnt = 0;
2060             do {
2061                 dp = NULL;
2062                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2063                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2064                     NULL, 1);
2065                 if (error)
2066                         return (error);
2067                 if (nmp->nm_clp != NULL)
2068                         clidrev = nmp->nm_clp->nfsc_clientidrev;
2069                 else
2070                         clidrev = 0;
2071                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2072                     nfs_numnfscbd == 0 || retrycnt > 0)
2073                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2074                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2075                           attrflagp, dattrflagp, dstuff, &unlocked);
2076                 else
2077                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2078                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2079                           attrflagp, dattrflagp, dstuff, &unlocked);
2080                 /*
2081                  * There is no need to invalidate cached attributes here,
2082                  * since new post-delegation issue attributes are always
2083                  * returned by nfsrpc_createv4() and these will update the
2084                  * attribute cache.
2085                  */
2086                 if (dp != NULL)
2087                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2088                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2089                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2090                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2091                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2092                     error == NFSERR_BADSESSION) {
2093                         (void) nfs_catnap(PZERO, error, "nfs_open");
2094                 } else if ((error == NFSERR_EXPIRED ||
2095                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2096                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2097                         retrycnt++;
2098                 }
2099             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2100                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2101                 error == NFSERR_BADSESSION ||
2102                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2103                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2104             if (error && retrycnt >= 4)
2105                     error = EIO;
2106         } else {
2107                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2108                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2109                     dstuff);
2110         }
2111         return (error);
2112 }
2113
2114 /*
2115  * The create rpc for v2 and 3.
2116  */
2117 static int
2118 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2119     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2120     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2121     int *attrflagp, int *dattrflagp, void *dstuff)
2122 {
2123         u_int32_t *tl;
2124         int error = 0;
2125         struct nfsrv_descript nfsd, *nd = &nfsd;
2126
2127         *nfhpp = NULL;
2128         *attrflagp = 0;
2129         *dattrflagp = 0;
2130         if (namelen > NFS_MAXNAMLEN)
2131                 return (ENAMETOOLONG);
2132         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2133         (void) nfsm_strtom(nd, name, namelen);
2134         if (nd->nd_flag & ND_NFSV3) {
2135                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2136                 if (fmode & O_EXCL) {
2137                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2138                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2139                         *tl++ = cverf.lval[0];
2140                         *tl = cverf.lval[1];
2141                 } else {
2142                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2143                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2144                 }
2145         } else {
2146                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2147         }
2148         error = nfscl_request(nd, dvp, p, cred, dstuff);
2149         if (error)
2150                 return (error);
2151         if (nd->nd_repstat == 0) {
2152                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2153                 if (error)
2154                         goto nfsmout;
2155         }
2156         if (nd->nd_flag & ND_NFSV3)
2157                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2158         if (nd->nd_repstat != 0 && error == 0)
2159                 error = nd->nd_repstat;
2160 nfsmout:
2161         m_freem(nd->nd_mrep);
2162         return (error);
2163 }
2164
2165 static int
2166 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2167     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2168     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2169     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2170     int *dattrflagp, void *dstuff, int *unlockedp)
2171 {
2172         u_int32_t *tl;
2173         int error = 0, deleg, newone, ret, acesize, limitby;
2174         struct nfsrv_descript nfsd, *nd = &nfsd;
2175         struct nfsclopen *op;
2176         struct nfscldeleg *dp = NULL;
2177         struct nfsnode *np;
2178         struct nfsfh *nfhp;
2179         nfsattrbit_t attrbits;
2180         nfsv4stateid_t stateid;
2181         u_int32_t rflags;
2182         struct nfsmount *nmp;
2183         struct nfsclsession *tsep;
2184
2185         nmp = VFSTONFS(dvp->v_mount);
2186         np = VTONFS(dvp);
2187         *unlockedp = 0;
2188         *nfhpp = NULL;
2189         *dpp = NULL;
2190         *attrflagp = 0;
2191         *dattrflagp = 0;
2192         if (namelen > NFS_MAXNAMLEN)
2193                 return (ENAMETOOLONG);
2194         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2195         /*
2196          * For V4, this is actually an Open op.
2197          */
2198         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2199         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2200         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2201             NFSV4OPEN_ACCESSREAD);
2202         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2203         tsep = nfsmnt_mdssession(nmp);
2204         *tl++ = tsep->nfsess_clientid.lval[0];
2205         *tl = tsep->nfsess_clientid.lval[1];
2206         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2207         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2208         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2209         if (fmode & O_EXCL) {
2210                 if (NFSHASNFSV4N(nmp)) {
2211                         if (NFSHASSESSPERSIST(nmp)) {
2212                                 /* Use GUARDED for persistent sessions. */
2213                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2214                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2215                         } else {
2216                                 /* Otherwise, use EXCLUSIVE4_1. */
2217                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2218                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2219                                 *tl++ = cverf.lval[0];
2220                                 *tl = cverf.lval[1];
2221                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2222                         }
2223                 } else {
2224                         /* NFSv4.0 */
2225                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2226                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2227                         *tl++ = cverf.lval[0];
2228                         *tl = cverf.lval[1];
2229                 }
2230         } else {
2231                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2232                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2233         }
2234         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2235         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2236         (void) nfsm_strtom(nd, name, namelen);
2237         /* Get the new file's handle and attributes. */
2238         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2239         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2240         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2241         NFSGETATTR_ATTRBIT(&attrbits);
2242         (void) nfsrv_putattrbit(nd, &attrbits);
2243         /* Get the directory's post-op attributes. */
2244         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2245         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2246         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2247         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2248         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2249         (void) nfsrv_putattrbit(nd, &attrbits);
2250         error = nfscl_request(nd, dvp, p, cred, dstuff);
2251         if (error)
2252                 return (error);
2253         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2254         if (nd->nd_repstat == 0) {
2255                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2256                     6 * NFSX_UNSIGNED);
2257                 stateid.seqid = *tl++;
2258                 stateid.other[0] = *tl++;
2259                 stateid.other[1] = *tl++;
2260                 stateid.other[2] = *tl;
2261                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2262                 (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2263                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2264                 deleg = fxdr_unsigned(int, *tl);
2265                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2266                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2267                         if (!(owp->nfsow_clp->nfsc_flags &
2268                               NFSCLFLAGS_FIRSTDELEG))
2269                                 owp->nfsow_clp->nfsc_flags |=
2270                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2271                         dp = malloc(
2272                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2273                             M_NFSCLDELEG, M_WAITOK);
2274                         LIST_INIT(&dp->nfsdl_owner);
2275                         LIST_INIT(&dp->nfsdl_lock);
2276                         dp->nfsdl_clp = owp->nfsow_clp;
2277                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2278                         nfscl_lockinit(&dp->nfsdl_rwlock);
2279                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2280                             NFSX_UNSIGNED);
2281                         dp->nfsdl_stateid.seqid = *tl++;
2282                         dp->nfsdl_stateid.other[0] = *tl++;
2283                         dp->nfsdl_stateid.other[1] = *tl++;
2284                         dp->nfsdl_stateid.other[2] = *tl++;
2285                         ret = fxdr_unsigned(int, *tl);
2286                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2287                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2288                                 /*
2289                                  * Indicates how much the file can grow.
2290                                  */
2291                                 NFSM_DISSECT(tl, u_int32_t *,
2292                                     3 * NFSX_UNSIGNED);
2293                                 limitby = fxdr_unsigned(int, *tl++);
2294                                 switch (limitby) {
2295                                 case NFSV4OPEN_LIMITSIZE:
2296                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2297                                         break;
2298                                 case NFSV4OPEN_LIMITBLOCKS:
2299                                         dp->nfsdl_sizelimit =
2300                                             fxdr_unsigned(u_int64_t, *tl++);
2301                                         dp->nfsdl_sizelimit *=
2302                                             fxdr_unsigned(u_int64_t, *tl);
2303                                         break;
2304                                 default:
2305                                         error = NFSERR_BADXDR;
2306                                         goto nfsmout;
2307                                 }
2308                         } else {
2309                                 dp->nfsdl_flags = NFSCLDL_READ;
2310                         }
2311                         if (ret)
2312                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2313                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2314                             &acesize, p);
2315                         if (error)
2316                                 goto nfsmout;
2317                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2318                         error = NFSERR_BADXDR;
2319                         goto nfsmout;
2320                 }
2321                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2322                 if (error)
2323                         goto nfsmout;
2324                 /* Get rid of the PutFH and Getattr status values. */
2325                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2326                 /* Load the directory attributes. */
2327                 error = nfsm_loadattr(nd, dnap);
2328                 if (error)
2329                         goto nfsmout;
2330                 *dattrflagp = 1;
2331                 if (dp != NULL && *attrflagp) {
2332                         dp->nfsdl_change = nnap->na_filerev;
2333                         dp->nfsdl_modtime = nnap->na_mtime;
2334                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2335                 }
2336                 /*
2337                  * We can now complete the Open state.
2338                  */
2339                 nfhp = *nfhpp;
2340                 if (dp != NULL) {
2341                         dp->nfsdl_fhlen = nfhp->nfh_len;
2342                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2343                 }
2344                 /*
2345                  * Get an Open structure that will be
2346                  * attached to the OpenOwner, acquired already.
2347                  */
2348                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2349                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2350                     cred, p, NULL, &op, &newone, NULL, 0);
2351                 if (error)
2352                         goto nfsmout;
2353                 op->nfso_stateid = stateid;
2354                 newnfs_copyincred(cred, &op->nfso_cred);
2355                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2356                     do {
2357                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2358                             nfhp->nfh_len, op, cred, p);
2359                         if (ret == NFSERR_DELAY)
2360                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2361                     } while (ret == NFSERR_DELAY);
2362                     error = ret;
2363                 }
2364
2365                 /*
2366                  * If the server is handing out delegations, but we didn't
2367                  * get one because an OpenConfirm was required, try the
2368                  * Open again, to get a delegation. This is a harmless no-op,
2369                  * from a server's point of view.
2370                  */
2371                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2372                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2373                     !error && dp == NULL) {
2374                     do {
2375                         ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2376                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2377                             nfhp->nfh_fh, nfhp->nfh_len,
2378                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2379                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2380                         if (ret == NFSERR_DELAY)
2381                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2382                     } while (ret == NFSERR_DELAY);
2383                     if (ret) {
2384                         if (dp != NULL) {
2385                                 free(dp, M_NFSCLDELEG);
2386                                 dp = NULL;
2387                         }
2388                         if (ret == NFSERR_STALECLIENTID ||
2389                             ret == NFSERR_STALEDONTRECOVER ||
2390                             ret == NFSERR_BADSESSION)
2391                                 error = ret;
2392                     }
2393                 }
2394                 nfscl_openrelease(nmp, op, error, newone);
2395                 *unlockedp = 1;
2396         }
2397         if (nd->nd_repstat != 0 && error == 0)
2398                 error = nd->nd_repstat;
2399         if (error == NFSERR_STALECLIENTID)
2400                 nfscl_initiate_recovery(owp->nfsow_clp);
2401 nfsmout:
2402         if (!error)
2403                 *dpp = dp;
2404         else if (dp != NULL)
2405                 free(dp, M_NFSCLDELEG);
2406         m_freem(nd->nd_mrep);
2407         return (error);
2408 }
2409
2410 /*
2411  * Nfs remove rpc
2412  */
2413 int
2414 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2415     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2416     void *dstuff)
2417 {
2418         u_int32_t *tl;
2419         struct nfsrv_descript nfsd, *nd = &nfsd;
2420         struct nfsnode *np;
2421         struct nfsmount *nmp;
2422         nfsv4stateid_t dstateid;
2423         int error, ret = 0, i;
2424
2425         *dattrflagp = 0;
2426         if (namelen > NFS_MAXNAMLEN)
2427                 return (ENAMETOOLONG);
2428         nmp = VFSTONFS(dvp->v_mount);
2429 tryagain:
2430         if (NFSHASNFSV4(nmp) && ret == 0) {
2431                 ret = nfscl_removedeleg(vp, p, &dstateid);
2432                 if (ret == 1) {
2433                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2434                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2435                             NFSX_UNSIGNED);
2436                         if (NFSHASNFSV4N(nmp))
2437                                 *tl++ = 0;
2438                         else
2439                                 *tl++ = dstateid.seqid;
2440                         *tl++ = dstateid.other[0];
2441                         *tl++ = dstateid.other[1];
2442                         *tl++ = dstateid.other[2];
2443                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2444                         np = VTONFS(dvp);
2445                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2446                             np->n_fhp->nfh_len, 0);
2447                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2448                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2449                 }
2450         } else {
2451                 ret = 0;
2452         }
2453         if (ret == 0)
2454                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2455         (void) nfsm_strtom(nd, name, namelen);
2456         error = nfscl_request(nd, dvp, p, cred, dstuff);
2457         if (error)
2458                 return (error);
2459         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2460                 /* For NFSv4, parse out any Delereturn replies. */
2461                 if (ret > 0 && nd->nd_repstat != 0 &&
2462                     (nd->nd_flag & ND_NOMOREDATA)) {
2463                         /*
2464                          * If the Delegreturn failed, try again without
2465                          * it. The server will Recall, as required.
2466                          */
2467                         m_freem(nd->nd_mrep);
2468                         goto tryagain;
2469                 }
2470                 for (i = 0; i < (ret * 2); i++) {
2471                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2472                             ND_NFSV4) {
2473                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2474                             if (*(tl + 1))
2475                                 nd->nd_flag |= ND_NOMOREDATA;
2476                         }
2477                 }
2478                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2479         }
2480         if (nd->nd_repstat && !error)
2481                 error = nd->nd_repstat;
2482 nfsmout:
2483         m_freem(nd->nd_mrep);
2484         return (error);
2485 }
2486
2487 /*
2488  * Do an nfs rename rpc.
2489  */
2490 int
2491 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2492     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2493     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2494     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2495 {
2496         u_int32_t *tl;
2497         struct nfsrv_descript nfsd, *nd = &nfsd;
2498         struct nfsmount *nmp;
2499         struct nfsnode *np;
2500         nfsattrbit_t attrbits;
2501         nfsv4stateid_t fdstateid, tdstateid;
2502         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2503         
2504         *fattrflagp = 0;
2505         *tattrflagp = 0;
2506         nmp = VFSTONFS(fdvp->v_mount);
2507         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2508                 return (ENAMETOOLONG);
2509 tryagain:
2510         if (NFSHASNFSV4(nmp) && ret == 0) {
2511                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2512                     &tdstateid, &gottd, p);
2513                 if (gotfd && gottd) {
2514                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2515                 } else if (gotfd) {
2516                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2517                 } else if (gottd) {
2518                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2519                 }
2520                 if (gotfd) {
2521                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2522                         if (NFSHASNFSV4N(nmp))
2523                                 *tl++ = 0;
2524                         else
2525                                 *tl++ = fdstateid.seqid;
2526                         *tl++ = fdstateid.other[0];
2527                         *tl++ = fdstateid.other[1];
2528                         *tl = fdstateid.other[2];
2529                         if (gottd) {
2530                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2531                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2532                                 np = VTONFS(tvp);
2533                                 (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2534                                     np->n_fhp->nfh_len, 0);
2535                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2536                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2537                         }
2538                 }
2539                 if (gottd) {
2540                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2541                         if (NFSHASNFSV4N(nmp))
2542                                 *tl++ = 0;
2543                         else
2544                                 *tl++ = tdstateid.seqid;
2545                         *tl++ = tdstateid.other[0];
2546                         *tl++ = tdstateid.other[1];
2547                         *tl = tdstateid.other[2];
2548                 }
2549                 if (ret > 0) {
2550                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2551                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2552                         np = VTONFS(fdvp);
2553                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2554                             np->n_fhp->nfh_len, 0);
2555                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2556                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2557                 }
2558         } else {
2559                 ret = 0;
2560         }
2561         if (ret == 0)
2562                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2563         if (nd->nd_flag & ND_NFSV4) {
2564                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2565                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2566                 NFSWCCATTR_ATTRBIT(&attrbits);
2567                 (void) nfsrv_putattrbit(nd, &attrbits);
2568                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2569                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2570                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2571                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2572                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2573                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2574                 (void) nfsrv_putattrbit(nd, &attrbits);
2575                 nd->nd_flag |= ND_V4WCCATTR;
2576                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2577                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2578         }
2579         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2580         if (!(nd->nd_flag & ND_NFSV4))
2581                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2582                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2583         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2584         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2585         if (error)
2586                 return (error);
2587         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2588                 /* For NFSv4, parse out any Delereturn replies. */
2589                 if (ret > 0 && nd->nd_repstat != 0 &&
2590                     (nd->nd_flag & ND_NOMOREDATA)) {
2591                         /*
2592                          * If the Delegreturn failed, try again without
2593                          * it. The server will Recall, as required.
2594                          */
2595                         m_freem(nd->nd_mrep);
2596                         goto tryagain;
2597                 }
2598                 for (i = 0; i < (ret * 2); i++) {
2599                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2600                             ND_NFSV4) {
2601                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2602                             if (*(tl + 1)) {
2603                                 if (i == 0 && ret > 1) {
2604                                     /*
2605                                      * If the Delegreturn failed, try again
2606                                      * without it. The server will Recall, as
2607                                      * required.
2608                                      * If ret > 1, the first iteration of this
2609                                      * loop is the second DelegReturn result.
2610                                      */
2611                                     m_freem(nd->nd_mrep);
2612                                     goto tryagain;
2613                                 } else {
2614                                     nd->nd_flag |= ND_NOMOREDATA;
2615                                 }
2616                             }
2617                         }
2618                 }
2619                 /* Now, the first wcc attribute reply. */
2620                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2621                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2622                         if (*(tl + 1))
2623                                 nd->nd_flag |= ND_NOMOREDATA;
2624                 }
2625                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2626                     fstuff);
2627                 /* and the second wcc attribute reply. */
2628                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2629                     !error) {
2630                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2631                         if (*(tl + 1))
2632                                 nd->nd_flag |= ND_NOMOREDATA;
2633                 }
2634                 if (!error)
2635                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2636                             NULL, tstuff);
2637         }
2638         if (nd->nd_repstat && !error)
2639                 error = nd->nd_repstat;
2640 nfsmout:
2641         m_freem(nd->nd_mrep);
2642         return (error);
2643 }
2644
2645 /*
2646  * nfs hard link create rpc
2647  */
2648 int
2649 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2650     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2651     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2652 {
2653         u_int32_t *tl;
2654         struct nfsrv_descript nfsd, *nd = &nfsd;
2655         nfsattrbit_t attrbits;
2656         int error = 0;
2657
2658         *attrflagp = 0;
2659         *dattrflagp = 0;
2660         if (namelen > NFS_MAXNAMLEN)
2661                 return (ENAMETOOLONG);
2662         NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2663         if (nd->nd_flag & ND_NFSV4) {
2664                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2665                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2666         }
2667         (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2668                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2669         if (nd->nd_flag & ND_NFSV4) {
2670                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2671                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2672                 NFSWCCATTR_ATTRBIT(&attrbits);
2673                 (void) nfsrv_putattrbit(nd, &attrbits);
2674                 nd->nd_flag |= ND_V4WCCATTR;
2675                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2676                 *tl = txdr_unsigned(NFSV4OP_LINK);
2677         }
2678         (void) nfsm_strtom(nd, name, namelen);
2679         error = nfscl_request(nd, vp, p, cred, dstuff);
2680         if (error)
2681                 return (error);
2682         if (nd->nd_flag & ND_NFSV3) {
2683                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2684                 if (!error)
2685                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2686                             NULL, dstuff);
2687         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2688                 /*
2689                  * First, parse out the PutFH and Getattr result.
2690                  */
2691                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2692                 if (!(*(tl + 1)))
2693                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2694                 if (*(tl + 1))
2695                         nd->nd_flag |= ND_NOMOREDATA;
2696                 /*
2697                  * Get the pre-op attributes.
2698                  */
2699                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2700         }
2701         if (nd->nd_repstat && !error)
2702                 error = nd->nd_repstat;
2703 nfsmout:
2704         m_freem(nd->nd_mrep);
2705         return (error);
2706 }
2707
2708 /*
2709  * nfs symbolic link create rpc
2710  */
2711 int
2712 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
2713     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2714     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2715     int *dattrflagp, void *dstuff)
2716 {
2717         u_int32_t *tl;
2718         struct nfsrv_descript nfsd, *nd = &nfsd;
2719         struct nfsmount *nmp;
2720         int slen, error = 0;
2721
2722         *nfhpp = NULL;
2723         *attrflagp = 0;
2724         *dattrflagp = 0;
2725         nmp = VFSTONFS(dvp->v_mount);
2726         slen = strlen(target);
2727         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2728                 return (ENAMETOOLONG);
2729         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2730         if (nd->nd_flag & ND_NFSV4) {
2731                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2732                 *tl = txdr_unsigned(NFLNK);
2733                 (void) nfsm_strtom(nd, target, slen);
2734         }
2735         (void) nfsm_strtom(nd, name, namelen);
2736         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2737                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2738         if (!(nd->nd_flag & ND_NFSV4))
2739                 (void) nfsm_strtom(nd, target, slen);
2740         if (nd->nd_flag & ND_NFSV2)
2741                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2742         error = nfscl_request(nd, dvp, p, cred, dstuff);
2743         if (error)
2744                 return (error);
2745         if (nd->nd_flag & ND_NFSV4)
2746                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2747         if ((nd->nd_flag & ND_NFSV3) && !error) {
2748                 if (!nd->nd_repstat)
2749                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2750                 if (!error)
2751                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2752                             NULL, dstuff);
2753         }
2754         if (nd->nd_repstat && !error)
2755                 error = nd->nd_repstat;
2756         m_freem(nd->nd_mrep);
2757         /*
2758          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2759          * Only do this if vfs.nfs.ignore_eexist is set.
2760          * Never do this for NFSv4.1 or later minor versions, since sessions
2761          * should guarantee "exactly once" RPC semantics.
2762          */
2763         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2764             nmp->nm_minorvers == 0))
2765                 error = 0;
2766         return (error);
2767 }
2768
2769 /*
2770  * nfs make dir rpc
2771  */
2772 int
2773 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2774     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2775     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2776     int *dattrflagp, void *dstuff)
2777 {
2778         u_int32_t *tl;
2779         struct nfsrv_descript nfsd, *nd = &nfsd;
2780         nfsattrbit_t attrbits;
2781         int error = 0;
2782         struct nfsfh *fhp;
2783         struct nfsmount *nmp;
2784
2785         *nfhpp = NULL;
2786         *attrflagp = 0;
2787         *dattrflagp = 0;
2788         nmp = VFSTONFS(dvp->v_mount);
2789         fhp = VTONFS(dvp)->n_fhp;
2790         if (namelen > NFS_MAXNAMLEN)
2791                 return (ENAMETOOLONG);
2792         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2793         if (nd->nd_flag & ND_NFSV4) {
2794                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2795                 *tl = txdr_unsigned(NFDIR);
2796         }
2797         (void) nfsm_strtom(nd, name, namelen);
2798         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2799         if (nd->nd_flag & ND_NFSV4) {
2800                 NFSGETATTR_ATTRBIT(&attrbits);
2801                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2802                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2803                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2804                 (void) nfsrv_putattrbit(nd, &attrbits);
2805                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2806                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2807                 (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2808                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2809                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2810                 (void) nfsrv_putattrbit(nd, &attrbits);
2811         }
2812         error = nfscl_request(nd, dvp, p, cred, dstuff);
2813         if (error)
2814                 return (error);
2815         if (nd->nd_flag & ND_NFSV4)
2816                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2817         if (!nd->nd_repstat && !error) {
2818                 if (nd->nd_flag & ND_NFSV4) {
2819                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2820                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2821                 }
2822                 if (!error)
2823                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2824                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2825                         /* Get rid of the PutFH and Getattr status values. */
2826                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2827                         /* Load the directory attributes. */
2828                         error = nfsm_loadattr(nd, dnap);
2829                         if (error == 0)
2830                                 *dattrflagp = 1;
2831                 }
2832         }
2833         if ((nd->nd_flag & ND_NFSV3) && !error)
2834                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2835         if (nd->nd_repstat && !error)
2836                 error = nd->nd_repstat;
2837 nfsmout:
2838         m_freem(nd->nd_mrep);
2839         /*
2840          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2841          * Only do this if vfs.nfs.ignore_eexist is set.
2842          * Never do this for NFSv4.1 or later minor versions, since sessions
2843          * should guarantee "exactly once" RPC semantics.
2844          */
2845         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2846             nmp->nm_minorvers == 0))
2847                 error = 0;
2848         return (error);
2849 }
2850
2851 /*
2852  * nfs remove directory call
2853  */
2854 int
2855 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2856     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2857 {
2858         struct nfsrv_descript nfsd, *nd = &nfsd;
2859         int error = 0;
2860
2861         *dattrflagp = 0;
2862         if (namelen > NFS_MAXNAMLEN)
2863                 return (ENAMETOOLONG);
2864         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2865         (void) nfsm_strtom(nd, name, namelen);
2866         error = nfscl_request(nd, dvp, p, cred, dstuff);
2867         if (error)
2868                 return (error);
2869         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2870                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2871         if (nd->nd_repstat && !error)
2872                 error = nd->nd_repstat;
2873         m_freem(nd->nd_mrep);
2874         /*
2875          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2876          */
2877         if (error == ENOENT)
2878                 error = 0;
2879         return (error);
2880 }
2881
2882 /*
2883  * Readdir rpc.
2884  * Always returns with either uio_resid unchanged, if you are at the
2885  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2886  * filled in.
2887  * I felt this would allow caching of directory blocks more easily
2888  * than returning a pertially filled block.
2889  * Directory offset cookies:
2890  * Oh my, what to do with them...
2891  * I can think of three ways to deal with them:
2892  * 1 - have the layer above these RPCs maintain a map between logical
2893  *     directory byte offsets and the NFS directory offset cookies
2894  * 2 - pass the opaque directory offset cookies up into userland
2895  *     and let the libc functions deal with them, via the system call
2896  * 3 - return them to userland in the "struct dirent", so future versions
2897  *     of libc can use them and do whatever is necessary to make things work
2898  *     above these rpc calls, in the meantime
2899  * For now, I do #3 by "hiding" the directory offset cookies after the
2900  * d_name field in struct dirent. This is space inside d_reclen that
2901  * will be ignored by anything that doesn't know about them.
2902  * The directory offset cookies are filled in as the last 8 bytes of
2903  * each directory entry, after d_name. Someday, the userland libc
2904  * functions may be able to use these. In the meantime, it satisfies
2905  * OpenBSD's requirements for cookies being returned.
2906  * If expects the directory offset cookie for the read to be in uio_offset
2907  * and returns the one for the next entry after this directory block in
2908  * there, as well.
2909  */
2910 int
2911 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2912     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2913     int *eofp, void *stuff)
2914 {
2915         int len, left;
2916         struct dirent *dp = NULL;
2917         u_int32_t *tl;
2918         nfsquad_t cookie, ncookie;
2919         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2920         struct nfsnode *dnp = VTONFS(vp);
2921         struct nfsvattr nfsva;
2922         struct nfsrv_descript nfsd, *nd = &nfsd;
2923         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2924         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2925         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2926         char *cp;
2927         nfsattrbit_t attrbits, dattrbits;
2928         u_int32_t rderr, *tl2 = NULL;
2929         size_t tresid;
2930
2931         KASSERT(uiop->uio_iovcnt == 1 &&
2932             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
2933             ("nfs readdirrpc bad uio"));
2934         ncookie.lval[0] = ncookie.lval[1] = 0;
2935         /*
2936          * There is no point in reading a lot more than uio_resid, however
2937          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2938          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2939          * will never make readsize > nm_readdirsize.
2940          */
2941         readsize = nmp->nm_readdirsize;
2942         if (readsize > uiop->uio_resid)
2943                 readsize = uiop->uio_resid + DIRBLKSIZ;
2944
2945         *attrflagp = 0;
2946         if (eofp)
2947                 *eofp = 0;
2948         tresid = uiop->uio_resid;
2949         cookie.lval[0] = cookiep->nfsuquad[0];
2950         cookie.lval[1] = cookiep->nfsuquad[1];
2951         nd->nd_mrep = NULL;
2952
2953         /*
2954          * For NFSv4, first create the "." and ".." entries.
2955          */
2956         if (NFSHASNFSV4(nmp)) {
2957                 reqsize = 6 * NFSX_UNSIGNED;
2958                 NFSGETATTR_ATTRBIT(&dattrbits);
2959                 NFSZERO_ATTRBIT(&attrbits);
2960                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2961                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2962                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2963                     NFSATTRBIT_MOUNTEDONFILEID)) {
2964                         NFSSETBIT_ATTRBIT(&attrbits,
2965                             NFSATTRBIT_MOUNTEDONFILEID);
2966                         gotmnton = 1;
2967                 } else {
2968                         /*
2969                          * Must fake it. Use the fileno, except when the
2970                          * fsid is != to that of the directory. For that
2971                          * case, generate a fake fileno that is not the same.
2972                          */
2973                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2974                         gotmnton = 0;
2975                 }
2976
2977                 /*
2978                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2979                  */
2980                 if (uiop->uio_offset == 0) {
2981                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2982                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2983                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2984                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2985                         (void) nfsrv_putattrbit(nd, &attrbits);
2986                         error = nfscl_request(nd, vp, p, cred, stuff);
2987                         if (error)
2988                             return (error);
2989                         dotfileid = 0;  /* Fake out the compiler. */
2990                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2991                             error = nfsm_loadattr(nd, &nfsva);
2992                             if (error != 0)
2993                                 goto nfsmout;
2994                             dotfileid = nfsva.na_fileid;
2995                         }
2996                         if (nd->nd_repstat == 0) {
2997                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2998                             len = fxdr_unsigned(int, *(tl + 4));
2999                             if (len > 0 && len <= NFSX_V4FHMAX)
3000                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3001                             else
3002                                 error = EPERM;
3003                             if (!error) {
3004                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3005                                 nfsva.na_mntonfileno = UINT64_MAX;
3006                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3007                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3008                                     NULL, NULL, NULL, p, cred);
3009                                 if (error) {
3010                                     dotdotfileid = dotfileid;
3011                                 } else if (gotmnton) {
3012                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3013                                         dotdotfileid = nfsva.na_mntonfileno;
3014                                     else
3015                                         dotdotfileid = nfsva.na_fileid;
3016                                 } else if (nfsva.na_filesid[0] ==
3017                                     dnp->n_vattr.na_filesid[0] &&
3018                                     nfsva.na_filesid[1] ==
3019                                     dnp->n_vattr.na_filesid[1]) {
3020                                     dotdotfileid = nfsva.na_fileid;
3021                                 } else {
3022                                     do {
3023                                         fakefileno--;
3024                                     } while (fakefileno ==
3025                                         nfsva.na_fileid);
3026                                     dotdotfileid = fakefileno;
3027                                 }
3028                             }
3029                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3030                             /*
3031                              * Lookupp returns NFSERR_NOENT when we are
3032                              * at the root, so just use the current dir.
3033                              */
3034                             nd->nd_repstat = 0;
3035                             dotdotfileid = dotfileid;
3036                         } else {
3037                             error = nd->nd_repstat;
3038                         }
3039                         m_freem(nd->nd_mrep);
3040                         if (error)
3041                             return (error);
3042                         nd->nd_mrep = NULL;
3043                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3044                         dp->d_pad0 = dp->d_pad1 = 0;
3045                         dp->d_off = 0;
3046                         dp->d_type = DT_DIR;
3047                         dp->d_fileno = dotfileid;
3048                         dp->d_namlen = 1;
3049                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3050                         dp->d_name[0] = '.';
3051                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3052                         /*
3053                          * Just make these offset cookie 0.
3054                          */
3055                         tl = (u_int32_t *)&dp->d_name[8];
3056                         *tl++ = 0;
3057                         *tl = 0;
3058                         blksiz += dp->d_reclen;
3059                         uiop->uio_resid -= dp->d_reclen;
3060                         uiop->uio_offset += dp->d_reclen;
3061                         uiop->uio_iov->iov_base =
3062                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3063                         uiop->uio_iov->iov_len -= dp->d_reclen;
3064                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3065                         dp->d_pad0 = dp->d_pad1 = 0;
3066                         dp->d_off = 0;
3067                         dp->d_type = DT_DIR;
3068                         dp->d_fileno = dotdotfileid;
3069                         dp->d_namlen = 2;
3070                         *((uint64_t *)dp->d_name) = 0;
3071                         dp->d_name[0] = '.';
3072                         dp->d_name[1] = '.';
3073                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3074                         /*
3075                          * Just make these offset cookie 0.
3076                          */
3077                         tl = (u_int32_t *)&dp->d_name[8];
3078                         *tl++ = 0;
3079                         *tl = 0;
3080                         blksiz += dp->d_reclen;
3081                         uiop->uio_resid -= dp->d_reclen;
3082                         uiop->uio_offset += dp->d_reclen;
3083                         uiop->uio_iov->iov_base =
3084                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3085                         uiop->uio_iov->iov_len -= dp->d_reclen;
3086                 }
3087                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3088         } else {
3089                 reqsize = 5 * NFSX_UNSIGNED;
3090         }
3091
3092
3093         /*
3094          * Loop around doing readdir rpc's of size readsize.
3095          * The stopping criteria is EOF or buffer full.
3096          */
3097         while (more_dirs && bigenough) {
3098                 *attrflagp = 0;
3099                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3100                 if (nd->nd_flag & ND_NFSV2) {
3101                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3102                         *tl++ = cookie.lval[1];
3103                         *tl = txdr_unsigned(readsize);
3104                 } else {
3105                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3106                         *tl++ = cookie.lval[0];
3107                         *tl++ = cookie.lval[1];
3108                         if (cookie.qval == 0) {
3109                                 *tl++ = 0;
3110                                 *tl++ = 0;
3111                         } else {
3112                                 NFSLOCKNODE(dnp);
3113                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3114                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3115                                 NFSUNLOCKNODE(dnp);
3116                         }
3117                         if (nd->nd_flag & ND_NFSV4) {
3118                                 *tl++ = txdr_unsigned(readsize);
3119                                 *tl = txdr_unsigned(readsize);
3120                                 (void) nfsrv_putattrbit(nd, &attrbits);
3121                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3122                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3123                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3124                         } else {
3125                                 *tl = txdr_unsigned(readsize);
3126                         }
3127                 }
3128                 error = nfscl_request(nd, vp, p, cred, stuff);
3129                 if (error)
3130                         return (error);
3131                 if (!(nd->nd_flag & ND_NFSV2)) {
3132                         if (nd->nd_flag & ND_NFSV3)
3133                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3134                                     stuff);
3135                         if (!nd->nd_repstat && !error) {
3136                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3137                                 NFSLOCKNODE(dnp);
3138                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3139                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3140                                 NFSUNLOCKNODE(dnp);
3141                         }
3142                 }
3143                 if (nd->nd_repstat || error) {
3144                         if (!error)
3145                                 error = nd->nd_repstat;
3146                         goto nfsmout;
3147                 }
3148                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3149                 more_dirs = fxdr_unsigned(int, *tl);
3150                 if (!more_dirs)
3151                         tryformoredirs = 0;
3152         
3153                 /* loop through the dir entries, doctoring them to 4bsd form */
3154                 while (more_dirs && bigenough) {
3155                         if (nd->nd_flag & ND_NFSV4) {
3156                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3157                                 ncookie.lval[0] = *tl++;
3158                                 ncookie.lval[1] = *tl++;
3159                                 len = fxdr_unsigned(int, *tl);
3160                         } else if (nd->nd_flag & ND_NFSV3) {
3161                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3162                                 nfsva.na_fileid = fxdr_hyper(tl);
3163                                 tl += 2;
3164                                 len = fxdr_unsigned(int, *tl);
3165                         } else {
3166                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3167                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3168                                     *tl++);
3169                                 len = fxdr_unsigned(int, *tl);
3170                         }
3171                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3172                                 error = EBADRPC;
3173                                 goto nfsmout;
3174                         }
3175                         tlen = roundup2(len, 8);
3176                         if (tlen == len)
3177                                 tlen += 8;  /* To ensure null termination. */
3178                         left = DIRBLKSIZ - blksiz;
3179                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3180                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3181                                 dp->d_reclen += left;
3182                                 uiop->uio_iov->iov_base =
3183                                     (char *)uiop->uio_iov->iov_base + left;
3184                                 uiop->uio_iov->iov_len -= left;
3185                                 uiop->uio_resid -= left;
3186                                 uiop->uio_offset += left;
3187                                 blksiz = 0;
3188                         }
3189                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3190                             uiop->uio_resid)
3191                                 bigenough = 0;
3192                         if (bigenough) {
3193                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3194                                 dp->d_pad0 = dp->d_pad1 = 0;
3195                                 dp->d_off = 0;
3196                                 dp->d_namlen = len;
3197                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3198                                     NFSX_HYPER;
3199                                 dp->d_type = DT_UNKNOWN;
3200                                 blksiz += dp->d_reclen;
3201                                 if (blksiz == DIRBLKSIZ)
3202                                         blksiz = 0;
3203                                 uiop->uio_resid -= DIRHDSIZ;
3204                                 uiop->uio_offset += DIRHDSIZ;
3205                                 uiop->uio_iov->iov_base =
3206                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3207                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3208                                 error = nfsm_mbufuio(nd, uiop, len);
3209                                 if (error)
3210                                         goto nfsmout;
3211                                 cp = uiop->uio_iov->iov_base;
3212                                 tlen -= len;
3213                                 NFSBZERO(cp, tlen);
3214                                 cp += tlen;     /* points to cookie storage */
3215                                 tl2 = (u_int32_t *)cp;
3216                                 uiop->uio_iov->iov_base =
3217                                     (char *)uiop->uio_iov->iov_base + tlen +
3218                                     NFSX_HYPER;
3219                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3220                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3221                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3222                         } else {
3223                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3224                                 if (error)
3225                                         goto nfsmout;
3226                         }
3227                         if (nd->nd_flag & ND_NFSV4) {
3228                                 rderr = 0;
3229                                 nfsva.na_mntonfileno = UINT64_MAX;
3230                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3231                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3232                                     NULL, NULL, &rderr, p, cred);
3233                                 if (error)
3234                                         goto nfsmout;
3235                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3236                         } else if (nd->nd_flag & ND_NFSV3) {
3237                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3238                                 ncookie.lval[0] = *tl++;
3239                                 ncookie.lval[1] = *tl++;
3240                         } else {
3241                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3242                                 ncookie.lval[0] = 0;
3243                                 ncookie.lval[1] = *tl++;
3244                         }
3245                         if (bigenough) {
3246                             if (nd->nd_flag & ND_NFSV4) {
3247                                 if (rderr) {
3248                                     dp->d_fileno = 0;
3249                                 } else {
3250                                     if (gotmnton) {
3251                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3252                                             dp->d_fileno = nfsva.na_mntonfileno;
3253                                         else
3254                                             dp->d_fileno = nfsva.na_fileid;
3255                                     } else if (nfsva.na_filesid[0] ==
3256                                         dnp->n_vattr.na_filesid[0] &&
3257                                         nfsva.na_filesid[1] ==
3258                                         dnp->n_vattr.na_filesid[1]) {
3259                                         dp->d_fileno = nfsva.na_fileid;
3260                                     } else {
3261                                         do {
3262                                             fakefileno--;
3263                                         } while (fakefileno ==
3264                                             nfsva.na_fileid);
3265                                         dp->d_fileno = fakefileno;
3266                                     }
3267                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3268                                 }
3269                             } else {
3270                                 dp->d_fileno = nfsva.na_fileid;
3271                             }
3272                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3273                                 ncookie.lval[0];
3274                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3275                                 ncookie.lval[1];
3276                         }
3277                         more_dirs = fxdr_unsigned(int, *tl);
3278                 }
3279                 /*
3280                  * If at end of rpc data, get the eof boolean
3281                  */
3282                 if (!more_dirs) {
3283                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3284                         eof = fxdr_unsigned(int, *tl);
3285                         if (tryformoredirs)
3286                                 more_dirs = !eof;
3287                         if (nd->nd_flag & ND_NFSV4) {
3288                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3289                                     stuff);
3290                                 if (error)
3291                                         goto nfsmout;
3292                         }
3293                 }
3294                 m_freem(nd->nd_mrep);
3295                 nd->nd_mrep = NULL;
3296         }
3297         /*
3298          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3299          * by increasing d_reclen for the last record.
3300          */
3301         if (blksiz > 0) {
3302                 left = DIRBLKSIZ - blksiz;
3303                 NFSBZERO(uiop->uio_iov->iov_base, left);
3304                 dp->d_reclen += left;
3305                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3306                     left;
3307                 uiop->uio_iov->iov_len -= left;
3308                 uiop->uio_resid -= left;
3309                 uiop->uio_offset += left;
3310         }
3311
3312         /*
3313          * If returning no data, assume end of file.
3314          * If not bigenough, return not end of file, since you aren't
3315          *    returning all the data
3316          * Otherwise, return the eof flag from the server.
3317          */
3318         if (eofp) {
3319                 if (tresid == ((size_t)(uiop->uio_resid)))
3320                         *eofp = 1;
3321                 else if (!bigenough)
3322                         *eofp = 0;
3323                 else
3324                         *eofp = eof;
3325         }
3326
3327         /*
3328          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3329          */
3330         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3331                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3332                 NFSBZERO(dp, DIRBLKSIZ);
3333                 dp->d_type = DT_UNKNOWN;
3334                 tl = (u_int32_t *)&dp->d_name[4];
3335                 *tl++ = cookie.lval[0];
3336                 *tl = cookie.lval[1];
3337                 dp->d_reclen = DIRBLKSIZ;
3338                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3339                     DIRBLKSIZ;
3340                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3341                 uiop->uio_resid -= DIRBLKSIZ;
3342                 uiop->uio_offset += DIRBLKSIZ;
3343         }
3344
3345 nfsmout:
3346         if (nd->nd_mrep != NULL)
3347                 m_freem(nd->nd_mrep);
3348         return (error);
3349 }
3350
3351 #ifndef APPLE
3352 /*
3353  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3354  * (Also used for NFS V4 when mount flag set.)
3355  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3356  */
3357 int
3358 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3359     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3360     int *eofp, void *stuff)
3361 {
3362         int len, left;
3363         struct dirent *dp = NULL;
3364         u_int32_t *tl;
3365         vnode_t newvp = NULLVP;
3366         struct nfsrv_descript nfsd, *nd = &nfsd;
3367         struct nameidata nami, *ndp = &nami;
3368         struct componentname *cnp = &ndp->ni_cnd;
3369         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3370         struct nfsnode *dnp = VTONFS(vp), *np;
3371         struct nfsvattr nfsva;
3372         struct nfsfh *nfhp;
3373         nfsquad_t cookie, ncookie;
3374         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3375         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3376         int isdotdot = 0, unlocknewvp = 0;
3377         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3378         u_int64_t fileno = 0;
3379         char *cp;
3380         nfsattrbit_t attrbits, dattrbits;
3381         size_t tresid;
3382         u_int32_t *tl2 = NULL, rderr;
3383         struct timespec dctime;
3384
3385         KASSERT(uiop->uio_iovcnt == 1 &&
3386             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3387             ("nfs readdirplusrpc bad uio"));
3388         ncookie.lval[0] = ncookie.lval[1] = 0;
3389         timespecclear(&dctime);
3390         *attrflagp = 0;
3391         if (eofp != NULL)
3392                 *eofp = 0;
3393         ndp->ni_dvp = vp;
3394         nd->nd_mrep = NULL;
3395         cookie.lval[0] = cookiep->nfsuquad[0];
3396         cookie.lval[1] = cookiep->nfsuquad[1];
3397         tresid = uiop->uio_resid;
3398
3399         /*
3400          * For NFSv4, first create the "." and ".." entries.
3401          */
3402         if (NFSHASNFSV4(nmp)) {
3403                 NFSGETATTR_ATTRBIT(&dattrbits);
3404                 NFSZERO_ATTRBIT(&attrbits);
3405                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3406                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3407                     NFSATTRBIT_MOUNTEDONFILEID)) {
3408                         NFSSETBIT_ATTRBIT(&attrbits,
3409                             NFSATTRBIT_MOUNTEDONFILEID);
3410                         gotmnton = 1;
3411                 } else {
3412                         /*
3413                          * Must fake it. Use the fileno, except when the
3414                          * fsid is != to that of the directory. For that
3415                          * case, generate a fake fileno that is not the same.
3416                          */
3417                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3418                         gotmnton = 0;
3419                 }
3420
3421                 /*
3422                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3423                  */
3424                 if (uiop->uio_offset == 0) {
3425                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3426                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3427                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3428                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3429                         (void) nfsrv_putattrbit(nd, &attrbits);
3430                         error = nfscl_request(nd, vp, p, cred, stuff);
3431                         if (error)
3432                             return (error);
3433                         dotfileid = 0;  /* Fake out the compiler. */
3434                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3435                             error = nfsm_loadattr(nd, &nfsva);
3436                             if (error != 0)
3437                                 goto nfsmout;
3438                             dctime = nfsva.na_ctime;
3439                             dotfileid = nfsva.na_fileid;
3440                         }
3441                         if (nd->nd_repstat == 0) {
3442                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3443                             len = fxdr_unsigned(int, *(tl + 4));
3444                             if (len > 0 && len <= NFSX_V4FHMAX)
3445                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3446                             else
3447                                 error = EPERM;
3448                             if (!error) {
3449                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3450                                 nfsva.na_mntonfileno = UINT64_MAX;
3451                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3452                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3453                                     NULL, NULL, NULL, p, cred);
3454                                 if (error) {
3455                                     dotdotfileid = dotfileid;
3456                                 } else if (gotmnton) {
3457                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3458                                         dotdotfileid = nfsva.na_mntonfileno;
3459                                     else
3460                                         dotdotfileid = nfsva.na_fileid;
3461                                 } else if (nfsva.na_filesid[0] ==
3462                                     dnp->n_vattr.na_filesid[0] &&
3463                                     nfsva.na_filesid[1] ==
3464                                     dnp->n_vattr.na_filesid[1]) {
3465                                     dotdotfileid = nfsva.na_fileid;
3466                                 } else {
3467                                     do {
3468                                         fakefileno--;
3469                                     } while (fakefileno ==
3470                                         nfsva.na_fileid);
3471                                     dotdotfileid = fakefileno;
3472                                 }
3473                             }
3474                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3475                             /*
3476                              * Lookupp returns NFSERR_NOENT when we are
3477                              * at the root, so just use the current dir.
3478                              */
3479                             nd->nd_repstat = 0;
3480                             dotdotfileid = dotfileid;
3481                         } else {
3482                             error = nd->nd_repstat;
3483                         }
3484                         m_freem(nd->nd_mrep);
3485                         if (error)
3486                             return (error);
3487                         nd->nd_mrep = NULL;
3488                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3489                         dp->d_pad0 = dp->d_pad1 = 0;
3490                         dp->d_off = 0;
3491                         dp->d_type = DT_DIR;
3492                         dp->d_fileno = dotfileid;
3493                         dp->d_namlen = 1;
3494                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3495                         dp->d_name[0] = '.';
3496                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3497                         /*
3498                          * Just make these offset cookie 0.
3499                          */
3500                         tl = (u_int32_t *)&dp->d_name[8];
3501                         *tl++ = 0;
3502                         *tl = 0;
3503                         blksiz += dp->d_reclen;
3504                         uiop->uio_resid -= dp->d_reclen;
3505                         uiop->uio_offset += dp->d_reclen;
3506                         uiop->uio_iov->iov_base =
3507                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3508                         uiop->uio_iov->iov_len -= dp->d_reclen;
3509                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3510                         dp->d_pad0 = dp->d_pad1 = 0;
3511                         dp->d_off = 0;
3512                         dp->d_type = DT_DIR;
3513                         dp->d_fileno = dotdotfileid;
3514                         dp->d_namlen = 2;
3515                         *((uint64_t *)dp->d_name) = 0;
3516                         dp->d_name[0] = '.';
3517                         dp->d_name[1] = '.';
3518                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3519                         /*
3520                          * Just make these offset cookie 0.
3521                          */
3522                         tl = (u_int32_t *)&dp->d_name[8];
3523                         *tl++ = 0;
3524                         *tl = 0;
3525                         blksiz += dp->d_reclen;
3526                         uiop->uio_resid -= dp->d_reclen;
3527                         uiop->uio_offset += dp->d_reclen;
3528                         uiop->uio_iov->iov_base =
3529                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3530                         uiop->uio_iov->iov_len -= dp->d_reclen;
3531                 }
3532                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3533                 if (gotmnton)
3534                         NFSSETBIT_ATTRBIT(&attrbits,
3535                             NFSATTRBIT_MOUNTEDONFILEID);
3536         }
3537
3538         /*
3539          * Loop around doing readdir rpc's of size nm_readdirsize.
3540          * The stopping criteria is EOF or buffer full.
3541          */
3542         while (more_dirs && bigenough) {
3543                 *attrflagp = 0;
3544                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3545                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3546                 *tl++ = cookie.lval[0];
3547                 *tl++ = cookie.lval[1];
3548                 if (cookie.qval == 0) {
3549                         *tl++ = 0;
3550                         *tl++ = 0;
3551                 } else {
3552                         NFSLOCKNODE(dnp);
3553                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3554                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3555                         NFSUNLOCKNODE(dnp);
3556                 }
3557                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3558                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3559                 if (nd->nd_flag & ND_NFSV4) {
3560                         (void) nfsrv_putattrbit(nd, &attrbits);
3561                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3562                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3563                         (void) nfsrv_putattrbit(nd, &dattrbits);
3564                 }
3565                 error = nfscl_request(nd, vp, p, cred, stuff);
3566                 if (error)
3567                         return (error);
3568                 if (nd->nd_flag & ND_NFSV3)
3569                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3570                 if (nd->nd_repstat || error) {
3571                         if (!error)
3572                                 error = nd->nd_repstat;
3573                         goto nfsmout;
3574                 }
3575                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3576                         dctime = nap->na_ctime;
3577                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3578                 NFSLOCKNODE(dnp);
3579                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3580                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3581                 NFSUNLOCKNODE(dnp);
3582                 more_dirs = fxdr_unsigned(int, *tl);
3583                 if (!more_dirs)
3584                         tryformoredirs = 0;
3585         
3586                 /* loop through the dir entries, doctoring them to 4bsd form */
3587                 while (more_dirs && bigenough) {
3588                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3589                         if (nd->nd_flag & ND_NFSV4) {
3590                                 ncookie.lval[0] = *tl++;
3591                                 ncookie.lval[1] = *tl++;
3592                         } else {
3593                                 fileno = fxdr_hyper(tl);
3594                                 tl += 2;
3595                         }
3596                         len = fxdr_unsigned(int, *tl);
3597                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3598                                 error = EBADRPC;
3599                                 goto nfsmout;
3600                         }
3601                         tlen = roundup2(len, 8);
3602                         if (tlen == len)
3603                                 tlen += 8;  /* To ensure null termination. */
3604                         left = DIRBLKSIZ - blksiz;
3605                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3606                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3607                                 dp->d_reclen += left;
3608                                 uiop->uio_iov->iov_base =
3609                                     (char *)uiop->uio_iov->iov_base + left;
3610                                 uiop->uio_iov->iov_len -= left;
3611                                 uiop->uio_resid -= left;
3612                                 uiop->uio_offset += left;
3613                                 blksiz = 0;
3614                         }
3615                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3616                             uiop->uio_resid)
3617                                 bigenough = 0;
3618                         if (bigenough) {
3619                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3620                                 dp->d_pad0 = dp->d_pad1 = 0;
3621                                 dp->d_off = 0;
3622                                 dp->d_namlen = len;
3623                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3624                                     NFSX_HYPER;
3625                                 dp->d_type = DT_UNKNOWN;
3626                                 blksiz += dp->d_reclen;
3627                                 if (blksiz == DIRBLKSIZ)
3628                                         blksiz = 0;
3629                                 uiop->uio_resid -= DIRHDSIZ;
3630                                 uiop->uio_offset += DIRHDSIZ;
3631                                 uiop->uio_iov->iov_base =
3632                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3633                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3634                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
3635                                 cnp->cn_namelen = len;
3636                                 NFSCNHASHZERO(cnp);
3637                                 error = nfsm_mbufuio(nd, uiop, len);
3638                                 if (error)
3639                                         goto nfsmout;
3640                                 cp = uiop->uio_iov->iov_base;
3641                                 tlen -= len;
3642                                 NFSBZERO(cp, tlen);
3643                                 cp += tlen;     /* points to cookie storage */
3644                                 tl2 = (u_int32_t *)cp;
3645                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3646                                     cnp->cn_nameptr[1] == '.')
3647                                         isdotdot = 1;
3648                                 else
3649                                         isdotdot = 0;
3650                                 uiop->uio_iov->iov_base =
3651                                     (char *)uiop->uio_iov->iov_base + tlen +
3652                                     NFSX_HYPER;
3653                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3654                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3655                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3656                         } else {
3657                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3658                                 if (error)
3659                                         goto nfsmout;
3660                         }
3661                         nfhp = NULL;
3662                         if (nd->nd_flag & ND_NFSV3) {
3663                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3664                                 ncookie.lval[0] = *tl++;
3665                                 ncookie.lval[1] = *tl++;
3666                                 attrflag = fxdr_unsigned(int, *tl);
3667                                 if (attrflag) {
3668                                   error = nfsm_loadattr(nd, &nfsva);
3669                                   if (error)
3670                                         goto nfsmout;
3671                                 }
3672                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3673                                 if (*tl) {
3674                                         error = nfsm_getfh(nd, &nfhp);
3675                                         if (error)
3676                                             goto nfsmout;
3677                                 }
3678                                 if (!attrflag && nfhp != NULL) {
3679                                         free(nfhp, M_NFSFH);
3680                                         nfhp = NULL;
3681                                 }
3682                         } else {
3683                                 rderr = 0;
3684                                 nfsva.na_mntonfileno = 0xffffffff;
3685                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3686                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3687                                     NULL, NULL, &rderr, p, cred);
3688                                 if (error)
3689                                         goto nfsmout;
3690                         }
3691
3692                         if (bigenough) {
3693                             if (nd->nd_flag & ND_NFSV4) {
3694                                 if (rderr) {
3695                                     dp->d_fileno = 0;
3696                                 } else if (gotmnton) {
3697                                     if (nfsva.na_mntonfileno != 0xffffffff)
3698                                         dp->d_fileno = nfsva.na_mntonfileno;
3699                                     else
3700                                         dp->d_fileno = nfsva.na_fileid;
3701                                 } else if (nfsva.na_filesid[0] ==
3702                                     dnp->n_vattr.na_filesid[0] &&
3703                                     nfsva.na_filesid[1] ==
3704                                     dnp->n_vattr.na_filesid[1]) {
3705                                     dp->d_fileno = nfsva.na_fileid;
3706                                 } else {
3707                                     do {
3708                                         fakefileno--;
3709                                     } while (fakefileno ==
3710                                         nfsva.na_fileid);
3711                                     dp->d_fileno = fakefileno;
3712                                 }
3713                             } else {
3714                                 dp->d_fileno = fileno;
3715                             }
3716                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3717                                 ncookie.lval[0];
3718                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3719                                 ncookie.lval[1];
3720
3721                             if (nfhp != NULL) {
3722                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3723                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3724                                     VREF(vp);
3725                                     newvp = vp;
3726                                     unlocknewvp = 0;
3727                                     free(nfhp, M_NFSFH);
3728                                     np = dnp;
3729                                 } else if (isdotdot != 0) {
3730                                     /*
3731                                      * Skip doing a nfscl_nget() call for "..".
3732                                      * There's a race between acquiring the nfs
3733                                      * node here and lookups that look for the
3734                                      * directory being read (in the parent).
3735                                      * It would try to get a lock on ".." here,
3736                                      * owning the lock on the directory being
3737                                      * read. Lookup will hold the lock on ".."
3738                                      * and try to acquire the lock on the
3739                                      * directory being read.
3740                                      * If the directory is unlocked/relocked,
3741                                      * then there is a LOR with the buflock
3742                                      * vp is relocked.
3743                                      */
3744                                     free(nfhp, M_NFSFH);
3745                                 } else {
3746                                     error = nfscl_nget(vp->v_mount, vp,
3747                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3748                                     if (!error) {
3749                                         newvp = NFSTOV(np);
3750                                         unlocknewvp = 1;
3751                                     }
3752                                 }
3753                                 nfhp = NULL;
3754                                 if (newvp != NULLVP) {
3755                                     error = nfscl_loadattrcache(&newvp,
3756                                         &nfsva, NULL, NULL, 0, 0);
3757                                     if (error) {
3758                                         if (unlocknewvp)
3759                                             vput(newvp);
3760                                         else
3761                                             vrele(newvp);
3762                                         goto nfsmout;
3763                                     }
3764                                     dp->d_type =
3765                                         vtonfs_dtype(np->n_vattr.na_type);
3766                                     ndp->ni_vp = newvp;
3767                                     NFSCNHASH(cnp, HASHINIT);
3768                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3769                                         (newvp->v_type != VDIR ||
3770                                          dctime.tv_sec != 0)) {
3771                                         cache_enter_time(ndp->ni_dvp,
3772                                             ndp->ni_vp, cnp,
3773                                             &nfsva.na_ctime,
3774                                             newvp->v_type != VDIR ? NULL :
3775                                             &dctime);
3776                                     }
3777                                     if (unlocknewvp)
3778                                         vput(newvp);
3779                                     else
3780                                         vrele(newvp);
3781                                     newvp = NULLVP;
3782                                 }
3783                             }
3784                         } else if (nfhp != NULL) {
3785                             free(nfhp, M_NFSFH);
3786                         }
3787                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3788                         more_dirs = fxdr_unsigned(int, *tl);
3789                 }
3790                 /*
3791                  * If at end of rpc data, get the eof boolean
3792                  */
3793                 if (!more_dirs) {
3794                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3795                         eof = fxdr_unsigned(int, *tl);
3796                         if (tryformoredirs)
3797                                 more_dirs = !eof;
3798                         if (nd->nd_flag & ND_NFSV4) {
3799                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3800                                     stuff);
3801                                 if (error)
3802                                         goto nfsmout;
3803                         }
3804                 }
3805                 m_freem(nd->nd_mrep);
3806                 nd->nd_mrep = NULL;
3807         }
3808         /*
3809          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3810          * by increasing d_reclen for the last record.
3811          */
3812         if (blksiz > 0) {
3813                 left = DIRBLKSIZ - blksiz;
3814                 NFSBZERO(uiop->uio_iov->iov_base, left);
3815                 dp->d_reclen += left;
3816                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3817                     left;
3818                 uiop->uio_iov->iov_len -= left;
3819                 uiop->uio_resid -= left;
3820                 uiop->uio_offset += left;
3821         }
3822
3823         /*
3824          * If returning no data, assume end of file.
3825          * If not bigenough, return not end of file, since you aren't
3826          *    returning all the data
3827          * Otherwise, return the eof flag from the server.
3828          */
3829         if (eofp != NULL) {
3830                 if (tresid == uiop->uio_resid)
3831                         *eofp = 1;
3832                 else if (!bigenough)
3833                         *eofp = 0;
3834                 else
3835                         *eofp = eof;
3836         }
3837
3838         /*
3839          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3840          */
3841         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3842                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3843                 NFSBZERO(dp, DIRBLKSIZ);
3844                 dp->d_type = DT_UNKNOWN;
3845                 tl = (u_int32_t *)&dp->d_name[4];
3846                 *tl++ = cookie.lval[0];
3847                 *tl = cookie.lval[1];
3848                 dp->d_reclen = DIRBLKSIZ;
3849                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3850                     DIRBLKSIZ;
3851                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3852                 uiop->uio_resid -= DIRBLKSIZ;
3853                 uiop->uio_offset += DIRBLKSIZ;
3854         }
3855
3856 nfsmout:
3857         if (nd->nd_mrep != NULL)
3858                 m_freem(nd->nd_mrep);
3859         return (error);
3860 }
3861 #endif  /* !APPLE */
3862
3863 /*
3864  * Nfs commit rpc
3865  */
3866 int
3867 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3868     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3869 {
3870         u_int32_t *tl;
3871         struct nfsrv_descript nfsd, *nd = &nfsd;
3872         nfsattrbit_t attrbits;
3873         int error;
3874         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3875         
3876         *attrflagp = 0;
3877         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3878         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3879         txdr_hyper(offset, tl);
3880         tl += 2;
3881         *tl = txdr_unsigned(cnt);
3882         if (nd->nd_flag & ND_NFSV4) {
3883                 /*
3884                  * And do a Getattr op.
3885                  */
3886                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3887                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3888                 NFSGETATTR_ATTRBIT(&attrbits);
3889                 (void) nfsrv_putattrbit(nd, &attrbits);
3890         }
3891         error = nfscl_request(nd, vp, p, cred, stuff);
3892         if (error)
3893                 return (error);
3894         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3895         if (!error && !nd->nd_repstat) {
3896                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3897                 NFSLOCKMNT(nmp);
3898                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3899                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3900                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
3901                 }
3902                 NFSUNLOCKMNT(nmp);
3903                 if (nd->nd_flag & ND_NFSV4)
3904                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3905         }
3906 nfsmout:
3907         if (!error && nd->nd_repstat)
3908                 error = nd->nd_repstat;
3909         m_freem(nd->nd_mrep);
3910         return (error);
3911 }
3912
3913 /*
3914  * NFS byte range lock rpc.
3915  * (Mostly just calls one of the three lower level RPC routines.)
3916  */
3917 int
3918 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3919     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3920 {
3921         struct nfscllockowner *lp;
3922         struct nfsclclient *clp;
3923         struct nfsfh *nfhp;
3924         struct nfsrv_descript nfsd, *nd = &nfsd;
3925         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3926         u_int64_t off, len;
3927         off_t start, end;
3928         u_int32_t clidrev = 0;
3929         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3930         int callcnt, dorpc;
3931
3932         /*
3933          * Convert the flock structure into a start and end and do POSIX
3934          * bounds checking.
3935          */
3936         switch (fl->l_whence) {
3937         case SEEK_SET:
3938         case SEEK_CUR:
3939                 /*
3940                  * Caller is responsible for adding any necessary offset
3941                  * when SEEK_CUR is used.
3942                  */
3943                 start = fl->l_start;
3944                 off = fl->l_start;
3945                 break;
3946         case SEEK_END:
3947                 start = size + fl->l_start;
3948                 off = size + fl->l_start;
3949                 break;
3950         default:
3951                 return (EINVAL);
3952         }
3953         if (start < 0)
3954                 return (EINVAL);
3955         if (fl->l_len != 0) {
3956                 end = start + fl->l_len - 1;
3957                 if (end < start)
3958                         return (EINVAL);
3959         }
3960
3961         len = fl->l_len;
3962         if (len == 0)
3963                 len = NFS64BITSSET;
3964         retrycnt = 0;
3965         do {
3966             nd->nd_repstat = 0;
3967             if (op == F_GETLK) {
3968                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3969                 if (error)
3970                         return (error);
3971                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3972                 if (!error) {
3973                         clidrev = clp->nfsc_clientidrev;
3974                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3975                             p, id, flags);
3976                 } else if (error == -1) {
3977                         error = 0;
3978                 }
3979                 nfscl_clientrelease(clp);
3980             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3981                 /*
3982                  * We must loop around for all lockowner cases.
3983                  */
3984                 callcnt = 0;
3985                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3986                 if (error)
3987                         return (error);
3988                 do {
3989                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3990                         clp, id, flags, &lp, &dorpc);
3991                     /*
3992                      * If it returns a NULL lp, we're done.
3993                      */
3994                     if (lp == NULL) {
3995                         if (callcnt == 0)
3996                             nfscl_clientrelease(clp);
3997                         else
3998                             nfscl_releasealllocks(clp, vp, p, id, flags);
3999                         return (error);
4000                     }
4001                     if (nmp->nm_clp != NULL)
4002                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4003                     else
4004                         clidrev = 0;
4005                     /*
4006                      * If the server doesn't support Posix lock semantics,
4007                      * only allow locks on the entire file, since it won't
4008                      * handle overlapping byte ranges.
4009                      * There might still be a problem when a lock
4010                      * upgrade/downgrade (read<->write) occurs, since the
4011                      * server "might" expect an unlock first?
4012                      */
4013                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4014                         (off == 0 && len == NFS64BITSSET))) {
4015                         /*
4016                          * Since the lock records will go away, we must
4017                          * wait for grace and delay here.
4018                          */
4019                         do {
4020                             error = nfsrpc_locku(nd, nmp, lp, off, len,
4021                                 NFSV4LOCKT_READ, cred, p, 0);
4022                             if ((nd->nd_repstat == NFSERR_GRACE ||
4023                                  nd->nd_repstat == NFSERR_DELAY) &&
4024                                 error == 0)
4025                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4026                                     "nfs_advlock");
4027                         } while ((nd->nd_repstat == NFSERR_GRACE ||
4028                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
4029                     }
4030                     callcnt++;
4031                 } while (error == 0 && nd->nd_repstat == 0);
4032                 nfscl_releasealllocks(clp, vp, p, id, flags);
4033             } else if (op == F_SETLK) {
4034                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4035                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4036                 if (error || donelocally) {
4037                         return (error);
4038                 }
4039                 if (nmp->nm_clp != NULL)
4040                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4041                 else
4042                         clidrev = 0;
4043                 nfhp = VTONFS(vp)->n_fhp;
4044                 if (!lp->nfsl_open->nfso_posixlock &&
4045                     (off != 0 || len != NFS64BITSSET)) {
4046                         error = EINVAL;
4047                 } else {
4048                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4049                             nfhp->nfh_len, lp, newone, reclaim, off,
4050                             len, fl->l_type, cred, p, 0);
4051                 }
4052                 if (!error)
4053                         error = nd->nd_repstat;
4054                 nfscl_lockrelease(lp, error, newone);
4055             } else {
4056                 error = EINVAL;
4057             }
4058             if (!error)
4059                 error = nd->nd_repstat;
4060             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4061                 error == NFSERR_STALEDONTRECOVER ||
4062                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4063                 error == NFSERR_BADSESSION) {
4064                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
4065             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4066                 && clidrev != 0) {
4067                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4068                 retrycnt++;
4069             }
4070         } while (error == NFSERR_GRACE ||
4071             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4072             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4073             error == NFSERR_BADSESSION ||
4074             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4075              expireret == 0 && clidrev != 0 && retrycnt < 4));
4076         if (error && retrycnt >= 4)
4077                 error = EIO;
4078         return (error);
4079 }
4080
4081 /*
4082  * The lower level routine for the LockT case.
4083  */
4084 int
4085 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4086     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4087     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4088 {
4089         u_int32_t *tl;
4090         int error, type, size;
4091         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4092         struct nfsnode *np;
4093         struct nfsmount *nmp;
4094         struct nfsclsession *tsep;
4095
4096         nmp = VFSTONFS(vp->v_mount);
4097         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
4098         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4099         if (fl->l_type == F_RDLCK)
4100                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4101         else
4102                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4103         txdr_hyper(off, tl);
4104         tl += 2;
4105         txdr_hyper(len, tl);
4106         tl += 2;
4107         tsep = nfsmnt_mdssession(nmp);
4108         *tl++ = tsep->nfsess_clientid.lval[0];
4109         *tl = tsep->nfsess_clientid.lval[1];
4110         nfscl_filllockowner(id, own, flags);
4111         np = VTONFS(vp);
4112         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4113             np->n_fhp->nfh_len);
4114         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4115         error = nfscl_request(nd, vp, p, cred, NULL);
4116         if (error)
4117                 return (error);
4118         if (nd->nd_repstat == 0) {
4119                 fl->l_type = F_UNLCK;
4120         } else if (nd->nd_repstat == NFSERR_DENIED) {
4121                 nd->nd_repstat = 0;
4122                 fl->l_whence = SEEK_SET;
4123                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4124                 fl->l_start = fxdr_hyper(tl);
4125                 tl += 2;
4126                 len = fxdr_hyper(tl);
4127                 tl += 2;
4128                 if (len == NFS64BITSSET)
4129                         fl->l_len = 0;
4130                 else
4131                         fl->l_len = len;
4132                 type = fxdr_unsigned(int, *tl++);
4133                 if (type == NFSV4LOCKT_WRITE)
4134                         fl->l_type = F_WRLCK;
4135                 else
4136                         fl->l_type = F_RDLCK;
4137                 /*
4138                  * XXX For now, I have no idea what to do with the
4139                  * conflicting lock_owner, so I'll just set the pid == 0
4140                  * and skip over the lock_owner.
4141                  */
4142                 fl->l_pid = (pid_t)0;
4143                 tl += 2;
4144                 size = fxdr_unsigned(int, *tl);
4145                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4146                         error = EBADRPC;
4147                 if (!error)
4148                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4149         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4150                 nfscl_initiate_recovery(clp);
4151 nfsmout:
4152         m_freem(nd->nd_mrep);
4153         return (error);
4154 }
4155
4156 /*
4157  * Lower level function that performs the LockU RPC.
4158  */
4159 static int
4160 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4161     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4162     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4163 {
4164         u_int32_t *tl;
4165         int error;
4166
4167         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4168             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, false);
4169         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4170         *tl++ = txdr_unsigned(type);
4171         *tl = txdr_unsigned(lp->nfsl_seqid);
4172         if (nfstest_outofseq &&
4173             (arc4random() % nfstest_outofseq) == 0)
4174                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4175         tl++;
4176         if (NFSHASNFSV4N(nmp))
4177                 *tl++ = 0;
4178         else
4179                 *tl++ = lp->nfsl_stateid.seqid;
4180         *tl++ = lp->nfsl_stateid.other[0];
4181         *tl++ = lp->nfsl_stateid.other[1];
4182         *tl++ = lp->nfsl_stateid.other[2];
4183         txdr_hyper(off, tl);
4184         tl += 2;
4185         txdr_hyper(len, tl);
4186         if (syscred)
4187                 nd->nd_flag |= ND_USEGSSNAME;
4188         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4189             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4190         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4191         if (error)
4192                 return (error);
4193         if (nd->nd_repstat == 0) {
4194                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4195                 lp->nfsl_stateid.seqid = *tl++;
4196                 lp->nfsl_stateid.other[0] = *tl++;
4197                 lp->nfsl_stateid.other[1] = *tl++;
4198                 lp->nfsl_stateid.other[2] = *tl;
4199         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4200                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4201 nfsmout:
4202         m_freem(nd->nd_mrep);
4203         return (error);
4204 }
4205
4206 /*
4207  * The actual Lock RPC.
4208  */
4209 int
4210 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4211     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4212     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4213     NFSPROC_T *p, int syscred)
4214 {
4215         u_int32_t *tl;
4216         int error, size;
4217         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4218         struct nfsclsession *tsep;
4219
4220         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4221             false);
4222         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4223         if (type == F_RDLCK)
4224                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4225         else
4226                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4227         *tl++ = txdr_unsigned(reclaim);
4228         txdr_hyper(off, tl);
4229         tl += 2;
4230         txdr_hyper(len, tl);
4231         tl += 2;
4232         if (newone) {
4233             *tl = newnfs_true;
4234             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4235                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4236             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4237             if (NFSHASNFSV4N(nmp))
4238                 *tl++ = 0;
4239             else
4240                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4241             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4242             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4243             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4244             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4245             tsep = nfsmnt_mdssession(nmp);
4246             *tl++ = tsep->nfsess_clientid.lval[0];
4247             *tl = tsep->nfsess_clientid.lval[1];
4248             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4249             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4250             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4251         } else {
4252             *tl = newnfs_false;
4253             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4254             if (NFSHASNFSV4N(nmp))
4255                 *tl++ = 0;
4256             else
4257                 *tl++ = lp->nfsl_stateid.seqid;
4258             *tl++ = lp->nfsl_stateid.other[0];
4259             *tl++ = lp->nfsl_stateid.other[1];
4260             *tl++ = lp->nfsl_stateid.other[2];
4261             *tl = txdr_unsigned(lp->nfsl_seqid);
4262             if (nfstest_outofseq &&
4263                 (arc4random() % nfstest_outofseq) == 0)
4264                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4265         }
4266         if (syscred)
4267                 nd->nd_flag |= ND_USEGSSNAME;
4268         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4269             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4270         if (error)
4271                 return (error);
4272         if (newone)
4273             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4274         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4275         if (nd->nd_repstat == 0) {
4276                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4277                 lp->nfsl_stateid.seqid = *tl++;
4278                 lp->nfsl_stateid.other[0] = *tl++;
4279                 lp->nfsl_stateid.other[1] = *tl++;
4280                 lp->nfsl_stateid.other[2] = *tl;
4281         } else if (nd->nd_repstat == NFSERR_DENIED) {
4282                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4283                 size = fxdr_unsigned(int, *(tl + 7));
4284                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4285                         error = EBADRPC;
4286                 if (!error)
4287                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4288         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4289                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4290 nfsmout:
4291         m_freem(nd->nd_mrep);
4292         return (error);
4293 }
4294
4295 /*
4296  * nfs statfs rpc
4297  * (always called with the vp for the mount point)
4298  */
4299 int
4300 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4301     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4302     void *stuff)
4303 {
4304         u_int32_t *tl = NULL;
4305         struct nfsrv_descript nfsd, *nd = &nfsd;
4306         struct nfsmount *nmp;
4307         nfsattrbit_t attrbits;
4308         int error;
4309
4310         *attrflagp = 0;
4311         nmp = VFSTONFS(vp->v_mount);
4312         if (NFSHASNFSV4(nmp)) {
4313                 /*
4314                  * For V4, you actually do a getattr.
4315                  */
4316                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4317                 NFSSTATFS_GETATTRBIT(&attrbits);
4318                 (void) nfsrv_putattrbit(nd, &attrbits);
4319                 nd->nd_flag |= ND_USEGSSNAME;
4320                 error = nfscl_request(nd, vp, p, cred, stuff);
4321                 if (error)
4322                         return (error);
4323                 if (nd->nd_repstat == 0) {
4324                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4325                             NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4326                             cred);
4327                         if (!error) {
4328                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4329                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4330                                 NFSSETHASSETFSID(nmp);
4331                                 *attrflagp = 1;
4332                         }
4333                 } else {
4334                         error = nd->nd_repstat;
4335                 }
4336                 if (error)
4337                         goto nfsmout;
4338         } else {
4339                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4340                 error = nfscl_request(nd, vp, p, cred, stuff);
4341                 if (error)
4342                         return (error);
4343                 if (nd->nd_flag & ND_NFSV3) {
4344                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4345                         if (error)
4346                                 goto nfsmout;
4347                 }
4348                 if (nd->nd_repstat) {
4349                         error = nd->nd_repstat;
4350                         goto nfsmout;
4351                 }
4352                 NFSM_DISSECT(tl, u_int32_t *,
4353                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4354         }
4355         if (NFSHASNFSV3(nmp)) {
4356                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4357                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4358                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4359                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4360                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4361                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4362                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4363         } else if (NFSHASNFSV4(nmp) == 0) {
4364                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4365                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4366                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4367                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4368                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4369         }
4370 nfsmout:
4371         m_freem(nd->nd_mrep);
4372         return (error);
4373 }
4374
4375 /*
4376  * nfs pathconf rpc
4377  */
4378 int
4379 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4380     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4381     void *stuff)
4382 {
4383         struct nfsrv_descript nfsd, *nd = &nfsd;
4384         struct nfsmount *nmp;
4385         u_int32_t *tl;
4386         nfsattrbit_t attrbits;
4387         int error;
4388
4389         *attrflagp = 0;
4390         nmp = VFSTONFS(vp->v_mount);
4391         if (NFSHASNFSV4(nmp)) {
4392                 /*
4393                  * For V4, you actually do a getattr.
4394                  */
4395                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4396                 NFSPATHCONF_GETATTRBIT(&attrbits);
4397                 (void) nfsrv_putattrbit(nd, &attrbits);
4398                 nd->nd_flag |= ND_USEGSSNAME;
4399                 error = nfscl_request(nd, vp, p, cred, stuff);
4400                 if (error)
4401                         return (error);
4402                 if (nd->nd_repstat == 0) {
4403                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4404                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4405                             cred);
4406                         if (!error)
4407                                 *attrflagp = 1;
4408                 } else {
4409                         error = nd->nd_repstat;
4410                 }
4411         } else {
4412                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4413                 error = nfscl_request(nd, vp, p, cred, stuff);
4414                 if (error)
4415                         return (error);
4416                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4417                 if (nd->nd_repstat && !error)
4418                         error = nd->nd_repstat;
4419                 if (!error) {
4420                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4421                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4422                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4423                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4424                         pc->pc_chownrestricted =
4425                             fxdr_unsigned(u_int32_t, *tl++);
4426                         pc->pc_caseinsensitive =
4427                             fxdr_unsigned(u_int32_t, *tl++);
4428                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4429                 }
4430         }
4431 nfsmout:
4432         m_freem(nd->nd_mrep);
4433         return (error);
4434 }
4435
4436 /*
4437  * nfs version 3 fsinfo rpc call
4438  */
4439 int
4440 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4441     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4442 {
4443         u_int32_t *tl;
4444         struct nfsrv_descript nfsd, *nd = &nfsd;
4445         int error;
4446
4447         *attrflagp = 0;
4448         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4449         error = nfscl_request(nd, vp, p, cred, stuff);
4450         if (error)
4451                 return (error);
4452         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4453         if (nd->nd_repstat && !error)
4454                 error = nd->nd_repstat;
4455         if (!error) {
4456                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4457                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4458                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4459                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4460                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4461                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4462                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4463                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4464                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4465                 tl += 2;
4466                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4467                 tl += 2;
4468                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4469         }
4470 nfsmout:
4471         m_freem(nd->nd_mrep);
4472         return (error);
4473 }
4474
4475 /*
4476  * This function performs the Renew RPC.
4477  */
4478 int
4479 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4480     NFSPROC_T *p)
4481 {
4482         u_int32_t *tl;
4483         struct nfsrv_descript nfsd;
4484         struct nfsrv_descript *nd = &nfsd;
4485         struct nfsmount *nmp;
4486         int error;
4487         struct nfssockreq *nrp;
4488         struct nfsclsession *tsep;
4489
4490         nmp = clp->nfsc_nmp;
4491         if (nmp == NULL)
4492                 return (0);
4493         if (dsp == NULL)
4494                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4495                     0, false);
4496         else
4497                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4498                     &dsp->nfsclds_sess, 0, 0, false);
4499         if (!NFSHASNFSV4N(nmp)) {
4500                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4501                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4502                 tsep = nfsmnt_mdssession(nmp);
4503                 *tl++ = tsep->nfsess_clientid.lval[0];
4504                 *tl = tsep->nfsess_clientid.lval[1];
4505         }
4506         nrp = NULL;
4507         if (dsp != NULL)
4508                 nrp = dsp->nfsclds_sockp;
4509         if (nrp == NULL)
4510                 /* If NULL, use the MDS socket. */
4511                 nrp = &nmp->nm_sockreq;
4512         nd->nd_flag |= ND_USEGSSNAME;
4513         if (dsp == NULL)
4514                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4515                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4516         else {
4517                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4518                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4519                 if (error == ENXIO)
4520                         nfscl_cancelreqs(dsp);
4521         }
4522         if (error)
4523                 return (error);
4524         error = nd->nd_repstat;
4525         m_freem(nd->nd_mrep);
4526         return (error);
4527 }
4528
4529 /*
4530  * This function performs the Releaselockowner RPC.
4531  */
4532 int
4533 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4534     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4535 {
4536         struct nfsrv_descript nfsd, *nd = &nfsd;
4537         u_int32_t *tl;
4538         int error;
4539         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4540         struct nfsclsession *tsep;
4541
4542         if (NFSHASNFSV4N(nmp)) {
4543                 /* For NFSv4.1, do a FreeStateID. */
4544                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4545                     NULL, 0, 0, false);
4546                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4547         } else {
4548                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4549                     NULL, 0, 0, false);
4550                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4551                 tsep = nfsmnt_mdssession(nmp);
4552                 *tl++ = tsep->nfsess_clientid.lval[0];
4553                 *tl = tsep->nfsess_clientid.lval[1];
4554                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4555                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4556                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4557         }
4558         nd->nd_flag |= ND_USEGSSNAME;
4559         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4560             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4561         if (error)
4562                 return (error);
4563         error = nd->nd_repstat;
4564         m_freem(nd->nd_mrep);
4565         return (error);
4566 }
4567
4568 /*
4569  * This function performs the Compound to get the mount pt FH.
4570  */
4571 int
4572 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4573     NFSPROC_T *p)
4574 {
4575         u_int32_t *tl;
4576         struct nfsrv_descript nfsd;
4577         struct nfsrv_descript *nd = &nfsd;
4578         u_char *cp, *cp2;
4579         int error, cnt, len, setnil;
4580         u_int32_t *opcntp;
4581
4582         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4583             0, false);
4584         cp = dirpath;
4585         cnt = 0;
4586         do {
4587                 setnil = 0;
4588                 while (*cp == '/')
4589                         cp++;
4590                 cp2 = cp;
4591                 while (*cp2 != '\0' && *cp2 != '/')
4592                         cp2++;
4593                 if (*cp2 == '/') {
4594                         setnil = 1;
4595                         *cp2 = '\0';
4596                 }
4597                 if (cp2 != cp) {
4598                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4599                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4600                         nfsm_strtom(nd, cp, strlen(cp));
4601                         cnt++;
4602                 }
4603                 if (setnil)
4604                         *cp2++ = '/';
4605                 cp = cp2;
4606         } while (*cp != '\0');
4607         if (NFSHASNFSV4N(nmp))
4608                 /* Has a Sequence Op done by nfscl_reqstart(). */
4609                 *opcntp = txdr_unsigned(3 + cnt);
4610         else
4611                 *opcntp = txdr_unsigned(2 + cnt);
4612         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4613         *tl = txdr_unsigned(NFSV4OP_GETFH);
4614         nd->nd_flag |= ND_USEGSSNAME;
4615         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4616                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4617         if (error)
4618                 return (error);
4619         if (nd->nd_repstat == 0) {
4620                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4621                 tl += (2 + 2 * cnt);
4622                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4623                         len > NFSX_FHMAX) {
4624                         nd->nd_repstat = NFSERR_BADXDR;
4625                 } else {
4626                         nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4627                         if (nd->nd_repstat == 0)
4628                                 nmp->nm_fhsize = len;
4629                 }
4630         }
4631         error = nd->nd_repstat;
4632 nfsmout:
4633         m_freem(nd->nd_mrep);
4634         return (error);
4635 }
4636
4637 /*
4638  * This function performs the Delegreturn RPC.
4639  */
4640 int
4641 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4642     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4643 {
4644         u_int32_t *tl;
4645         struct nfsrv_descript nfsd;
4646         struct nfsrv_descript *nd = &nfsd;
4647         int error;
4648
4649         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4650             dp->nfsdl_fhlen, NULL, NULL, 0, 0, false);
4651         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4652         if (NFSHASNFSV4N(nmp))
4653                 *tl++ = 0;
4654         else
4655                 *tl++ = dp->nfsdl_stateid.seqid;
4656         *tl++ = dp->nfsdl_stateid.other[0];
4657         *tl++ = dp->nfsdl_stateid.other[1];
4658         *tl = dp->nfsdl_stateid.other[2];
4659         if (syscred)
4660                 nd->nd_flag |= ND_USEGSSNAME;
4661         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4662             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4663         if (error)
4664                 return (error);
4665         error = nd->nd_repstat;
4666         m_freem(nd->nd_mrep);
4667         return (error);
4668 }
4669
4670 /*
4671  * nfs getacl call.
4672  */
4673 int
4674 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4675     struct acl *aclp, void *stuff)
4676 {
4677         struct nfsrv_descript nfsd, *nd = &nfsd;
4678         int error;
4679         nfsattrbit_t attrbits;
4680         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4681         
4682         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4683                 return (EOPNOTSUPP);
4684         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4685         NFSZERO_ATTRBIT(&attrbits);
4686         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4687         (void) nfsrv_putattrbit(nd, &attrbits);
4688         error = nfscl_request(nd, vp, p, cred, stuff);
4689         if (error)
4690                 return (error);
4691         if (!nd->nd_repstat)
4692                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4693                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4694         else
4695                 error = nd->nd_repstat;
4696         m_freem(nd->nd_mrep);
4697         return (error);
4698 }
4699
4700 /*
4701  * nfs setacl call.
4702  */
4703 int
4704 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4705     struct acl *aclp, void *stuff)
4706 {
4707         int error;
4708         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4709         
4710         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4711                 return (EOPNOTSUPP);
4712         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4713         return (error);
4714 }
4715
4716 /*
4717  * nfs setacl call.
4718  */
4719 static int
4720 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4721     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4722 {
4723         struct nfsrv_descript nfsd, *nd = &nfsd;
4724         int error;
4725         nfsattrbit_t attrbits;
4726         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4727         
4728         if (!NFSHASNFSV4(nmp))
4729                 return (EOPNOTSUPP);
4730         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4731         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4732         NFSZERO_ATTRBIT(&attrbits);
4733         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4734         (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
4735             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4736         error = nfscl_request(nd, vp, p, cred, stuff);
4737         if (error)
4738                 return (error);
4739         /* Don't care about the pre/postop attributes */
4740         m_freem(nd->nd_mrep);
4741         return (nd->nd_repstat);
4742 }
4743
4744 /*
4745  * Do the NFSv4.1 Exchange ID.
4746  */
4747 int
4748 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4749     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
4750     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
4751 {
4752         uint32_t *tl, v41flags;
4753         struct nfsrv_descript nfsd;
4754         struct nfsrv_descript *nd = &nfsd;
4755         struct nfsclds *dsp;
4756         struct timespec verstime;
4757         int error, len;
4758
4759         *dspp = NULL;
4760         if (minorvers == 0)
4761                 minorvers = nmp->nm_minorvers;
4762         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
4763             NFS_VER4, minorvers, false);
4764         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4765         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4766         *tl = txdr_unsigned(clp->nfsc_rev);
4767         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4768
4769         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4770         *tl++ = txdr_unsigned(exchflags);
4771         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4772
4773         /* Set the implementation id4 */
4774         *tl = txdr_unsigned(1);
4775         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4776         (void) nfsm_strtom(nd, version, strlen(version));
4777         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4778         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4779         verstime.tv_nsec = 0;
4780         txdr_nfsv4time(&verstime, tl);
4781         nd->nd_flag |= ND_USEGSSNAME;
4782         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4783             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4784         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4785             (int)nd->nd_repstat);
4786         if (error != 0)
4787                 return (error);
4788         if (nd->nd_repstat == 0) {
4789                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4790                 len = fxdr_unsigned(int, *(tl + 7));
4791                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4792                         error = NFSERR_BADXDR;
4793                         goto nfsmout;
4794                 }
4795                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4796                     M_WAITOK | M_ZERO);
4797                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4798                 dsp->nfsclds_servownlen = len;
4799                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4800                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4801                 dsp->nfsclds_sess.nfsess_sequenceid =
4802                     fxdr_unsigned(uint32_t, *tl++);
4803                 v41flags = fxdr_unsigned(uint32_t, *tl);
4804                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4805                     NFSHASPNFSOPT(nmp)) {
4806                         NFSCL_DEBUG(1, "set PNFS\n");
4807                         NFSLOCKMNT(nmp);
4808                         nmp->nm_state |= NFSSTA_PNFS;
4809                         NFSUNLOCKMNT(nmp);
4810                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4811                 }
4812                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4813                         dsp->nfsclds_flags |= NFSCLDS_DS;
4814                 if (minorvers == NFSV42_MINORVERSION)
4815                         dsp->nfsclds_flags |= NFSCLDS_MINORV2;
4816                 if (len > 0)
4817                         nd->nd_repstat = nfsrv_mtostr(nd,
4818                             dsp->nfsclds_serverown, len);
4819                 if (nd->nd_repstat == 0) {
4820                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4821                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4822                             NULL, MTX_DEF);
4823                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4824                         *dspp = dsp;
4825                 } else
4826                         free(dsp, M_NFSCLDS);
4827         }
4828         error = nd->nd_repstat;
4829 nfsmout:
4830         m_freem(nd->nd_mrep);
4831         return (error);
4832 }
4833
4834 /*
4835  * Do the NFSv4.1 Create Session.
4836  */
4837 int
4838 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4839     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
4840     struct ucred *cred, NFSPROC_T *p)
4841 {
4842         uint32_t crflags, maxval, *tl;
4843         struct nfsrv_descript nfsd;
4844         struct nfsrv_descript *nd = &nfsd;
4845         int error, irdcnt, minorvers;
4846
4847         /* Make sure nm_rsize, nm_wsize is set. */
4848         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4849                 nmp->nm_rsize = NFS_MAXBSIZE;
4850         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4851                 nmp->nm_wsize = NFS_MAXBSIZE;
4852         if (dsp == NULL)
4853                 minorvers = nmp->nm_minorvers;
4854         else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
4855                 minorvers = NFSV42_MINORVERSION;
4856         else
4857                 minorvers = NFSV41_MINORVERSION;
4858         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
4859             NFS_VER4, minorvers, false);
4860         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4861         *tl++ = sep->nfsess_clientid.lval[0];
4862         *tl++ = sep->nfsess_clientid.lval[1];
4863         *tl++ = txdr_unsigned(sequenceid);
4864         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4865         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4866                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
4867         *tl = txdr_unsigned(crflags);
4868
4869         /* Fill in fore channel attributes. */
4870         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4871         *tl++ = 0;                              /* Header pad size */
4872         if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
4873             nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
4874                 /*
4875                  * NFSv4.2 Extended Attribute operations may want to do
4876                  * requests/replies that are larger than nm_rsize/nm_wsize.
4877                  */
4878                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4879                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4880         } else {
4881                 *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
4882                 *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
4883         }
4884         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4885         *tl++ = txdr_unsigned(20);              /* Max operations */
4886         *tl++ = txdr_unsigned(64);              /* Max slots */
4887         *tl = 0;                                /* No rdma ird */
4888
4889         /* Fill in back channel attributes. */
4890         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4891         *tl++ = 0;                              /* Header pad size */
4892         *tl++ = txdr_unsigned(10000);           /* Max request size */
4893         *tl++ = txdr_unsigned(10000);           /* Max response size */
4894         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4895         *tl++ = txdr_unsigned(4);               /* Max operations */
4896         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
4897         *tl = 0;                                /* No rdma ird */
4898
4899         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4900         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
4901
4902         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
4903         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
4904         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
4905         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4906         *tl++ = 0;                              /* Null machine name */
4907         *tl++ = 0;                              /* Uid == 0 */
4908         *tl++ = 0;                              /* Gid == 0 */
4909         *tl = 0;                                /* No additional gids */
4910         nd->nd_flag |= ND_USEGSSNAME;
4911         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4912             NFS_VER4, NULL, 1, NULL, NULL);
4913         if (error != 0)
4914                 return (error);
4915         if (nd->nd_repstat == 0) {
4916                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4917                     2 * NFSX_UNSIGNED);
4918                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4919                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4920                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4921                 crflags = fxdr_unsigned(uint32_t, *tl);
4922                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4923                         NFSLOCKMNT(nmp);
4924                         nmp->nm_state |= NFSSTA_SESSPERSIST;
4925                         NFSUNLOCKMNT(nmp);
4926                 }
4927
4928                 /* Get the fore channel slot count. */
4929                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4930                 tl++;                   /* Skip the header pad size. */
4931
4932                 /* Make sure nm_wsize is small enough. */
4933                 maxval = fxdr_unsigned(uint32_t, *tl++);
4934                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4935                         if (nmp->nm_wsize > 8096)
4936                                 nmp->nm_wsize /= 2;
4937                         else
4938                                 break;
4939                 }
4940                 sep->nfsess_maxreq = maxval;
4941
4942                 /* Make sure nm_rsize is small enough. */
4943                 maxval = fxdr_unsigned(uint32_t, *tl++);
4944                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4945                         if (nmp->nm_rsize > 8096)
4946                                 nmp->nm_rsize /= 2;
4947                         else
4948                                 break;
4949                 }
4950                 sep->nfsess_maxresp = maxval;
4951
4952                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4953                 tl++;
4954                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4955                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4956                 irdcnt = fxdr_unsigned(int, *tl);
4957                 if (irdcnt > 0)
4958                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4959
4960                 /* and the back channel slot count. */
4961                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4962                 tl += 5;
4963                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4964                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4965         }
4966         error = nd->nd_repstat;
4967 nfsmout:
4968         m_freem(nd->nd_mrep);
4969         return (error);
4970 }
4971
4972 /*
4973  * Do the NFSv4.1 Destroy Session.
4974  */
4975 int
4976 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4977     struct ucred *cred, NFSPROC_T *p)
4978 {
4979         uint32_t *tl;
4980         struct nfsrv_descript nfsd;
4981         struct nfsrv_descript *nd = &nfsd;
4982         int error;
4983         struct nfsclsession *tsep;
4984
4985         nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4986             0, false);
4987         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4988         tsep = nfsmnt_mdssession(nmp);
4989         bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4990         nd->nd_flag |= ND_USEGSSNAME;
4991         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4992             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4993         if (error != 0)
4994                 return (error);
4995         error = nd->nd_repstat;
4996         m_freem(nd->nd_mrep);
4997         return (error);
4998 }
4999
5000 /*
5001  * Do the NFSv4.1 Destroy Client.
5002  */
5003 int
5004 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5005     struct ucred *cred, NFSPROC_T *p)
5006 {
5007         uint32_t *tl;
5008         struct nfsrv_descript nfsd;
5009         struct nfsrv_descript *nd = &nfsd;
5010         int error;
5011         struct nfsclsession *tsep;
5012
5013         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5014             0, false);
5015         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5016         tsep = nfsmnt_mdssession(nmp);
5017         *tl++ = tsep->nfsess_clientid.lval[0];
5018         *tl = tsep->nfsess_clientid.lval[1];
5019         nd->nd_flag |= ND_USEGSSNAME;
5020         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5021             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5022         if (error != 0)
5023                 return (error);
5024         error = nd->nd_repstat;
5025         m_freem(nd->nd_mrep);
5026         return (error);
5027 }
5028
5029 /*
5030  * Do the NFSv4.1 LayoutGet.
5031  */
5032 static int
5033 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5034     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5035     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5036     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
5037     void *stuff)
5038 {
5039         struct nfsrv_descript nfsd, *nd = &nfsd;
5040         int error;
5041
5042         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5043             0, false);
5044         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5045             layouttype, layoutlen, 0);
5046         nd->nd_flag |= ND_USEGSSNAME;
5047         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5048             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5049         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5050         if (error != 0)
5051                 return (error);
5052         if (nd->nd_repstat == 0)
5053                 error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5054                     flhp);
5055         if (error == 0 && nd->nd_repstat != 0)
5056                 error = nd->nd_repstat;
5057         m_freem(nd->nd_mrep);
5058         return (error);
5059 }
5060
5061 /*
5062  * Do the NFSv4.1 Get Device Info.
5063  */
5064 int
5065 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5066     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5067     NFSPROC_T *p)
5068 {
5069         uint32_t cnt, *tl, vers, minorvers;
5070         struct nfsrv_descript nfsd;
5071         struct nfsrv_descript *nd = &nfsd;
5072         struct sockaddr_in sin, ssin;
5073         struct sockaddr_in6 sin6, ssin6;
5074         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5075         struct nfscldevinfo *ndi;
5076         int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5077         int stripecnt;
5078         uint8_t stripeindex;
5079         sa_family_t af, safilled;
5080
5081         ssin.sin_port = 0;              /* To shut up compiler. */
5082         ssin.sin_addr.s_addr = 0;       /* ditto */
5083         *ndip = NULL;
5084         ndi = NULL;
5085         gotdspp = NULL;
5086         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5087             0, false);
5088         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5089         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5090         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5091         *tl++ = txdr_unsigned(layouttype);
5092         *tl++ = txdr_unsigned(100000);
5093         if (notifybitsp != NULL && *notifybitsp != 0) {
5094                 *tl = txdr_unsigned(1);         /* One word of bits. */
5095                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5096                 *tl = txdr_unsigned(*notifybitsp);
5097         } else
5098                 *tl = txdr_unsigned(0);
5099         nd->nd_flag |= ND_USEGSSNAME;
5100         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5101             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5102         if (error != 0)
5103                 return (error);
5104         if (nd->nd_repstat == 0) {
5105                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5106                 if (layouttype != fxdr_unsigned(int, *tl))
5107                         printf("EEK! devinfo layout type not same!\n");
5108                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5109                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5110                         stripecnt = fxdr_unsigned(int, *tl);
5111                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5112                         if (stripecnt < 1 || stripecnt > 4096) {
5113                                 printf("pNFS File layout devinfo stripecnt %d:"
5114                                     " out of range\n", stripecnt);
5115                                 error = NFSERR_BADXDR;
5116                                 goto nfsmout;
5117                         }
5118                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5119                             NFSX_UNSIGNED);
5120                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5121                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5122                         if (addrcnt < 1 || addrcnt > 128) {
5123                                 printf("NFS devinfo addrcnt %d: out of range\n",
5124                                     addrcnt);
5125                                 error = NFSERR_BADXDR;
5126                                 goto nfsmout;
5127                         }
5128         
5129                         /*
5130                          * Now we know how many stripe indices and addresses, so
5131                          * we can allocate the structure the correct size.
5132                          */
5133                         i = (stripecnt * sizeof(uint8_t)) /
5134                             sizeof(struct nfsclds *) + 1;
5135                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5136                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5137                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5138                             M_ZERO);
5139                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5140                             NFSX_V4DEVICEID);
5141                         ndi->nfsdi_refcnt = 0;
5142                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5143                         ndi->nfsdi_stripecnt = stripecnt;
5144                         ndi->nfsdi_addrcnt = addrcnt;
5145                         /* Fill in the stripe indices. */
5146                         for (i = 0; i < stripecnt; i++) {
5147                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5148                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5149                                 if (stripeindex >= addrcnt) {
5150                                         printf("pNFS File Layout devinfo"
5151                                             " stripeindex %d: too big\n",
5152                                             (int)stripeindex);
5153                                         error = NFSERR_BADXDR;
5154                                         goto nfsmout;
5155                                 }
5156                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5157                         }
5158                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5159                         /* For Flex File, we only get one address list. */
5160                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5161                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5162                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5163                             NFSX_V4DEVICEID);
5164                         ndi->nfsdi_refcnt = 0;
5165                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5166                         addrcnt = ndi->nfsdi_addrcnt = 1;
5167                 }
5168
5169                 /* Now, dissect the server address(es). */
5170                 safilled = AF_UNSPEC;
5171                 for (i = 0; i < addrcnt; i++) {
5172                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5173                         cnt = fxdr_unsigned(uint32_t, *tl);
5174                         if (cnt == 0) {
5175                                 printf("NFS devinfo 0 len addrlist\n");
5176                                 error = NFSERR_BADXDR;
5177                                 goto nfsmout;
5178                         }
5179                         dspp = nfsfldi_addr(ndi, i);
5180                         safilled = AF_UNSPEC;
5181                         for (j = 0; j < cnt; j++) {
5182                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5183                                     &isudp);
5184                                 if (error != 0 && error != EPERM) {
5185                                         error = NFSERR_BADXDR;
5186                                         goto nfsmout;
5187                                 }
5188                                 if (error == 0 && isudp == 0) {
5189                                         /*
5190                                          * The priority is:
5191                                          * - Same address family.
5192                                          * Save the address and dspp, so that
5193                                          * the connection can be done after
5194                                          * parsing is complete.
5195                                          */
5196                                         if (safilled == AF_UNSPEC ||
5197                                             (af == nmp->nm_nam->sa_family &&
5198                                              safilled != nmp->nm_nam->sa_family)
5199                                            ) {
5200                                                 if (af == AF_INET)
5201                                                         ssin = sin;
5202                                                 else
5203                                                         ssin6 = sin6;
5204                                                 safilled = af;
5205                                                 gotdspp = dspp;
5206                                         }
5207                                 }
5208                         }
5209                 }
5210
5211                 gotvers = NFS_VER4;     /* Default NFSv4.1 for File Layout. */
5212                 gotminor = NFSV41_MINORVERSION;
5213                 /* For Flex File, we will take one of the versions to use. */
5214                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5215                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5216                         j = fxdr_unsigned(int, *tl);
5217                         if (j < 1 || j > NFSDEV_MAXVERS) {
5218                                 printf("pNFS: too many versions\n");
5219                                 error = NFSERR_BADXDR;
5220                                 goto nfsmout;
5221                         }
5222                         gotvers = 0;
5223                         gotminor = 0;
5224                         for (i = 0; i < j; i++) {
5225                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5226                                 vers = fxdr_unsigned(uint32_t, *tl++);
5227                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5228                                 if (vers == NFS_VER3)
5229                                         minorvers = 0;
5230                                 if ((vers == NFS_VER4 && ((minorvers ==
5231                                     NFSV41_MINORVERSION && gotminor == 0) ||
5232                                     minorvers == NFSV42_MINORVERSION)) ||
5233                                     (vers == NFS_VER3 && gotvers == 0)) {
5234                                         gotvers = vers;
5235                                         gotminor = minorvers;
5236                                         /* We'll take this one. */
5237                                         ndi->nfsdi_versindex = i;
5238                                         ndi->nfsdi_vers = vers;
5239                                         ndi->nfsdi_minorvers = minorvers;
5240                                         ndi->nfsdi_rsize = fxdr_unsigned(
5241                                             uint32_t, *tl++);
5242                                         ndi->nfsdi_wsize = fxdr_unsigned(
5243                                             uint32_t, *tl++);
5244                                         if (*tl == newnfs_true)
5245                                                 ndi->nfsdi_flags |=
5246                                                     NFSDI_TIGHTCOUPLED;
5247                                         else
5248                                                 ndi->nfsdi_flags &=
5249                                                     ~NFSDI_TIGHTCOUPLED;
5250                                 }
5251                         }
5252                         if (gotvers == 0) {
5253                                 printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5254                                 error = NFSERR_BADXDR;
5255                                 goto nfsmout;
5256                         }
5257                 }
5258
5259                 /* And the notify bits. */
5260                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5261                 bitcnt = fxdr_unsigned(int, *tl);
5262                 if (bitcnt > 0) {
5263                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5264                         if (notifybitsp != NULL)
5265                                 *notifybitsp =
5266                                     fxdr_unsigned(uint32_t, *tl);
5267                 }
5268                 if (safilled != AF_UNSPEC) {
5269                         KASSERT(ndi != NULL, ("ndi is NULL"));
5270                         *ndip = ndi;
5271                 } else
5272                         error = EPERM;
5273                 if (error == 0) {
5274                         /*
5275                          * Now we can do a TCP connection for the correct
5276                          * NFS version and IP address.
5277                          */
5278                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5279                             gotvers, gotminor, &dsp, p);
5280                 }
5281                 if (error == 0) {
5282                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5283                         *gotdspp = dsp;
5284                 }
5285         }
5286         if (nd->nd_repstat != 0 && error == 0)
5287                 error = nd->nd_repstat;
5288 nfsmout:
5289         if (error != 0 && ndi != NULL)
5290                 nfscl_freedevinfo(ndi);
5291         m_freem(nd->nd_mrep);
5292         return (error);
5293 }
5294
5295 /*
5296  * Do the NFSv4.1 LayoutCommit.
5297  */
5298 int
5299 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5300     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5301     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5302 {
5303         uint32_t *tl;
5304         struct nfsrv_descript nfsd, *nd = &nfsd;
5305         int error;
5306
5307         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5308             0, 0, false);
5309         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5310             NFSX_STATEID);
5311         txdr_hyper(off, tl);
5312         tl += 2;
5313         txdr_hyper(len, tl);
5314         tl += 2;
5315         if (reclaim != 0)
5316                 *tl++ = newnfs_true;
5317         else
5318                 *tl++ = newnfs_false;
5319         *tl++ = txdr_unsigned(stateidp->seqid);
5320         *tl++ = stateidp->other[0];
5321         *tl++ = stateidp->other[1];
5322         *tl++ = stateidp->other[2];
5323         *tl++ = newnfs_true;
5324         if (lastbyte < off)
5325                 lastbyte = off;
5326         else if (lastbyte >= (off + len))
5327                 lastbyte = off + len - 1;
5328         txdr_hyper(lastbyte, tl);
5329         tl += 2;
5330         *tl++ = newnfs_false;
5331         *tl++ = txdr_unsigned(layouttype);
5332         /* All supported layouts are 0 length. */
5333         *tl = txdr_unsigned(0);
5334         nd->nd_flag |= ND_USEGSSNAME;
5335         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5336             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5337         if (error != 0)
5338                 return (error);
5339         error = nd->nd_repstat;
5340         m_freem(nd->nd_mrep);
5341         return (error);
5342 }
5343
5344 /*
5345  * Do the NFSv4.1 LayoutReturn.
5346  */
5347 int
5348 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5349     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5350     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5351     uint32_t stat, uint32_t op, char *devid)
5352 {
5353         uint32_t *tl;
5354         struct nfsrv_descript nfsd, *nd = &nfsd;
5355         uint64_t tu64;
5356         int error;
5357
5358         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5359             0, 0, false);
5360         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5361         if (reclaim != 0)
5362                 *tl++ = newnfs_true;
5363         else
5364                 *tl++ = newnfs_false;
5365         *tl++ = txdr_unsigned(layouttype);
5366         *tl++ = txdr_unsigned(iomode);
5367         *tl = txdr_unsigned(layoutreturn);
5368         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5369                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5370                     NFSX_UNSIGNED);
5371                 txdr_hyper(offset, tl);
5372                 tl += 2;
5373                 txdr_hyper(len, tl);
5374                 tl += 2;
5375                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5376                 *tl++ = txdr_unsigned(stateidp->seqid);
5377                 *tl++ = stateidp->other[0];
5378                 *tl++ = stateidp->other[1];
5379                 *tl++ = stateidp->other[2];
5380                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5381                         *tl = txdr_unsigned(0);
5382                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5383                         if (stat != 0) {
5384                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5385                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5386                                     NFSX_UNSIGNED);
5387                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5388                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5389                                     NFSX_UNSIGNED);
5390                                 *tl++ = txdr_unsigned(1);       /* One error. */
5391                                 tu64 = 0;                       /* Offset. */
5392                                 txdr_hyper(tu64, tl); tl += 2;
5393                                 tu64 = UINT64_MAX;              /* Length. */
5394                                 txdr_hyper(tu64, tl); tl += 2;
5395                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5396                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5397                                 *tl++ = txdr_unsigned(1);       /* One error. */
5398                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5399                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5400                                 *tl++ = txdr_unsigned(stat);
5401                                 *tl++ = txdr_unsigned(op);
5402                         } else {
5403                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5404                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5405                                 /* No ioerrs. */
5406                                 *tl++ = 0;
5407                         }
5408                         *tl = 0;        /* No stats yet. */
5409                 }
5410         }
5411         nd->nd_flag |= ND_USEGSSNAME;
5412         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5413             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5414         if (error != 0)
5415                 return (error);
5416         if (nd->nd_repstat == 0) {
5417                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5418                 if (*tl != 0) {
5419                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5420                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5421                         stateidp->other[0] = *tl++;
5422                         stateidp->other[1] = *tl++;
5423                         stateidp->other[2] = *tl;
5424                 }
5425         } else
5426                 error = nd->nd_repstat;
5427 nfsmout:
5428         m_freem(nd->nd_mrep);
5429         return (error);
5430 }
5431
5432 /*
5433  * Acquire a layout and devinfo, if possible. The caller must have acquired
5434  * a reference count on the nfsclclient structure before calling this.
5435  * Return the layout in lypp with a reference count on it, if successful.
5436  */
5437 static int
5438 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5439     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5440     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5441 {
5442         struct nfscllayout *lyp;
5443         struct nfsclflayout *flp;
5444         struct nfsclflayouthead flh;
5445         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5446         nfsv4stateid_t stateid;
5447         struct nfsclsession *tsep;
5448
5449         *lypp = NULL;
5450         if (NFSHASFLEXFILE(nmp))
5451                 layouttype = NFSLAYOUT_FLEXFILE;
5452         else
5453                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5454         /*
5455          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5456          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5457          * flp == NULL.
5458          */
5459         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5460             off, &flp, &recalled);
5461         islocked = 0;
5462         if (lyp == NULL || flp == NULL) {
5463                 if (recalled != 0)
5464                         return (EIO);
5465                 LIST_INIT(&flh);
5466                 tsep = nfsmnt_mdssession(nmp);
5467                 layoutlen = tsep->nfsess_maxcache -
5468                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5469                 if (lyp == NULL) {
5470                         stateid.seqid = 0;
5471                         stateid.other[0] = stateidp->other[0];
5472                         stateid.other[1] = stateidp->other[1];
5473                         stateid.other[2] = stateidp->other[2];
5474                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5475                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5476                             (uint64_t)0, layouttype, layoutlen, &stateid,
5477                             &retonclose, &flh, cred, p, NULL);
5478                 } else {
5479                         islocked = 1;
5480                         stateid.seqid = lyp->nfsly_stateid.seqid;
5481                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5482                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5483                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5484                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5485                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5486                             (uint64_t)0, layouttype, layoutlen, &stateid,
5487                             &retonclose, &flh, cred, p, NULL);
5488                 }
5489                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5490                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5491                     &flh, layouttype, error, NULL, cred, p);
5492                 if (error == 0)
5493                         *lypp = lyp;
5494                 else if (islocked != 0)
5495                         nfscl_rellayout(lyp, 1);
5496         } else
5497                 *lypp = lyp;
5498         return (error);
5499 }
5500
5501 /*
5502  * Do a TCP connection plus exchange id and create session.
5503  * If successful, a "struct nfsclds" is linked into the list for the
5504  * mount point and a pointer to it is returned.
5505  */
5506 static int
5507 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5508     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5509     struct nfsclds **dspp, NFSPROC_T *p)
5510 {
5511         struct sockaddr_in *msad, *sad;
5512         struct sockaddr_in6 *msad6, *sad6;
5513         struct nfsclclient *clp;
5514         struct nfssockreq *nrp;
5515         struct nfsclds *dsp, *tdsp;
5516         int error, firsttry;
5517         enum nfsclds_state retv;
5518         uint32_t sequenceid = 0;
5519
5520         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5521             ("nfsrpc_fillsa: NULL nr_cred"));
5522         NFSLOCKCLSTATE();
5523         clp = nmp->nm_clp;
5524         NFSUNLOCKCLSTATE();
5525         if (clp == NULL)
5526                 return (EPERM);
5527         if (af == AF_INET) {
5528                 NFSLOCKMNT(nmp);
5529                 /*
5530                  * Check to see if we already have a session for this
5531                  * address that is usable for a DS.
5532                  * Note that the MDS's address is in a different place
5533                  * than the sessions already acquired for DS's.
5534                  */
5535                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5536                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5537                 while (tdsp != NULL) {
5538                         if (msad != NULL && msad->sin_family == AF_INET &&
5539                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5540                             sin->sin_port == msad->sin_port &&
5541                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5542                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5543                                 *dspp = tdsp;
5544                                 NFSUNLOCKMNT(nmp);
5545                                 NFSCL_DEBUG(4, "fnd same addr\n");
5546                                 return (0);
5547                         }
5548                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5549                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5550                                 msad = (struct sockaddr_in *)
5551                                     tdsp->nfsclds_sockp->nr_nam;
5552                         else
5553                                 msad = NULL;
5554                 }
5555                 NFSUNLOCKMNT(nmp);
5556
5557                 /* No IP address match, so look for new/trunked one. */
5558                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5559                 sad->sin_len = sizeof(*sad);
5560                 sad->sin_family = AF_INET;
5561                 sad->sin_port = sin->sin_port;
5562                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5563                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5564                 nrp->nr_nam = (struct sockaddr *)sad;
5565         } else if (af == AF_INET6) {
5566                 NFSLOCKMNT(nmp);
5567                 /*
5568                  * Check to see if we already have a session for this
5569                  * address that is usable for a DS.
5570                  * Note that the MDS's address is in a different place
5571                  * than the sessions already acquired for DS's.
5572                  */
5573                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5574                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5575                 while (tdsp != NULL) {
5576                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5577                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5578                             &msad6->sin6_addr) &&
5579                             sin6->sin6_port == msad6->sin6_port &&
5580                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5581                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5582                                 *dspp = tdsp;
5583                                 NFSUNLOCKMNT(nmp);
5584                                 return (0);
5585                         }
5586                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5587                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5588                                 msad6 = (struct sockaddr_in6 *)
5589                                     tdsp->nfsclds_sockp->nr_nam;
5590                         else
5591                                 msad6 = NULL;
5592                 }
5593                 NFSUNLOCKMNT(nmp);
5594
5595                 /* No IP address match, so look for new/trunked one. */
5596                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5597                 sad6->sin6_len = sizeof(*sad6);
5598                 sad6->sin6_family = AF_INET6;
5599                 sad6->sin6_port = sin6->sin6_port;
5600                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5601                     sizeof(struct in6_addr));
5602                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5603                 nrp->nr_nam = (struct sockaddr *)sad6;
5604         } else
5605                 return (EPERM);
5606
5607         nrp->nr_sotype = SOCK_STREAM;
5608         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5609         nrp->nr_prog = NFS_PROG;
5610         nrp->nr_vers = vers;
5611
5612         /*
5613          * Use the credentials that were used for the mount, which are
5614          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5615          * Ref. counting the credentials with crhold() is probably not
5616          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5617          * unmount, but I did it anyhow.
5618          */
5619         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5620         error = newnfs_connect(nmp, nrp, NULL, p, 0);
5621         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5622
5623         dsp = NULL;
5624         /* Now, do the exchangeid and create session. */
5625         if (error == 0) {
5626                 if (vers == NFS_VER4) {
5627                         firsttry = 0;
5628                         do {
5629                                 error = nfsrpc_exchangeid(nmp, clp, nrp, 
5630                                     minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5631                                     nrp->nr_cred, p);
5632                                 NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5633                                 if (error == NFSERR_MINORVERMISMATCH)
5634                                         minorvers = NFSV42_MINORVERSION;
5635                         } while (error == NFSERR_MINORVERMISMATCH &&
5636                             firsttry++ == 0);
5637                         if (error != 0)
5638                                 newnfs_disconnect(nrp);
5639                 } else {
5640                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5641                             M_WAITOK | M_ZERO);
5642                         dsp->nfsclds_flags |= NFSCLDS_DS;
5643                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5644                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5645                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5646                             NULL, MTX_DEF);
5647                 }
5648         }
5649         if (error == 0) {
5650                 dsp->nfsclds_sockp = nrp;
5651                 if (vers == NFS_VER4) {
5652                         NFSLOCKMNT(nmp);
5653                         retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5654                             &sequenceid);
5655                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5656                         if (retv == NFSDSP_USETHISSESSION &&
5657                             nfscl_dssameconn != 0) {
5658                                 NFSLOCKDS(tdsp);
5659                                 tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5660                                 NFSUNLOCKDS(tdsp);
5661                                 NFSUNLOCKMNT(nmp);
5662                                 /*
5663                                  * If there is already a session for this
5664                                  * server, use it.
5665                                  */
5666                                 (void)newnfs_disconnect(nrp);
5667                                 nfscl_freenfsclds(dsp);
5668                                 *dspp = tdsp;
5669                                 return (0);
5670                         }
5671                         if (retv == NFSDSP_NOTFOUND)
5672                                 sequenceid =
5673                                     dsp->nfsclds_sess.nfsess_sequenceid;
5674                         NFSUNLOCKMNT(nmp);
5675                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5676                             nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
5677                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5678                 }
5679         } else {
5680                 NFSFREECRED(nrp->nr_cred);
5681                 NFSFREEMUTEX(&nrp->nr_mtx);
5682                 free(nrp->nr_nam, M_SONAME);
5683                 free(nrp, M_NFSSOCKREQ);
5684         }
5685         if (error == 0) {
5686                 NFSCL_DEBUG(3, "add DS session\n");
5687                 /*
5688                  * Put it at the end of the list. That way the list
5689                  * is ordered by when the entry was added. This matters
5690                  * since the one done first is the one that should be
5691                  * used for sequencid'ing any subsequent create sessions.
5692                  */
5693                 NFSLOCKMNT(nmp);
5694                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5695                 NFSUNLOCKMNT(nmp);
5696                 *dspp = dsp;
5697         } else if (dsp != NULL) {
5698                 newnfs_disconnect(nrp);
5699                 nfscl_freenfsclds(dsp);
5700         }
5701         return (error);
5702 }
5703
5704 /*
5705  * Do the NFSv4.1 Reclaim Complete.
5706  */
5707 int
5708 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5709 {
5710         uint32_t *tl;
5711         struct nfsrv_descript nfsd;
5712         struct nfsrv_descript *nd = &nfsd;
5713         int error;
5714
5715         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5716             0, false);
5717         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5718         *tl = newnfs_false;
5719         nd->nd_flag |= ND_USEGSSNAME;
5720         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5721             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5722         if (error != 0)
5723                 return (error);
5724         error = nd->nd_repstat;
5725         m_freem(nd->nd_mrep);
5726         return (error);
5727 }
5728
5729 /*
5730  * Initialize the slot tables for a session.
5731  */
5732 static void
5733 nfscl_initsessionslots(struct nfsclsession *sep)
5734 {
5735         int i;
5736
5737         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5738                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5739                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5740                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5741         }
5742         for (i = 0; i < 64; i++)
5743                 sep->nfsess_slotseq[i] = 0;
5744         sep->nfsess_slots = 0;
5745 }
5746
5747 /*
5748  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5749  */
5750 int
5751 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5752     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5753 {
5754         struct nfsnode *np = VTONFS(vp);
5755         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5756         struct nfscllayout *layp;
5757         struct nfscldevinfo *dip;
5758         struct nfsclflayout *rflp;
5759         struct mbuf *m, *m2;
5760         struct nfsclwritedsdorpc *drpc, *tdrpc;
5761         nfsv4stateid_t stateid;
5762         struct ucred *newcred;
5763         uint64_t lastbyte, len, off, oresid, xfer;
5764         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5765         void *lckp;
5766         uint8_t *dev;
5767         void *iovbase = NULL;
5768         size_t iovlen = 0;
5769         off_t offs = 0;
5770         ssize_t resid = 0;
5771
5772         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5773             (np->n_flag & NNOLAYOUT) != 0)
5774                 return (EIO);
5775         /* Now, get a reference cnt on the clientid for this mount. */
5776         if (nfscl_getref(nmp) == 0)
5777                 return (EIO);
5778
5779         /* Find an appropriate stateid. */
5780         newcred = NFSNEWCRED(cred);
5781         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5782             rwaccess, 1, newcred, p, &stateid, &lckp);
5783         if (error != 0) {
5784                 NFSFREECRED(newcred);
5785                 nfscl_relref(nmp);
5786                 return (error);
5787         }
5788         /* Search for a layout for this file. */
5789         off = uiop->uio_offset;
5790         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5791             np->n_fhp->nfh_len, off, &rflp, &recalled);
5792         if (layp == NULL || rflp == NULL) {
5793                 if (recalled != 0) {
5794                         NFSFREECRED(newcred);
5795                         nfscl_relref(nmp);
5796                         return (EIO);
5797                 }
5798                 if (layp != NULL) {
5799                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5800                         layp = NULL;
5801                 }
5802                 /* Try and get a Layout, if it is supported. */
5803                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5804                     (np->n_flag & NWRITEOPENED) != 0)
5805                         iolaymode = NFSLAYOUTIOMODE_RW;
5806                 else
5807                         iolaymode = NFSLAYOUTIOMODE_READ;
5808                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5809                     NULL, &stateid, off, &layp, newcred, p);
5810                 if (error != 0) {
5811                         NFSLOCKNODE(np);
5812                         np->n_flag |= NNOLAYOUT;
5813                         NFSUNLOCKNODE(np);
5814                         if (lckp != NULL)
5815                                 nfscl_lockderef(lckp);
5816                         NFSFREECRED(newcred);
5817                         if (layp != NULL)
5818                                 nfscl_rellayout(layp, 0);
5819                         nfscl_relref(nmp);
5820                         return (error);
5821                 }
5822         }
5823
5824         /*
5825          * Loop around finding a layout that works for the first part of
5826          * this I/O operation, and then call the function that actually
5827          * does the RPC.
5828          */
5829         eof = 0;
5830         len = (uint64_t)uiop->uio_resid;
5831         while (len > 0 && error == 0 && eof == 0) {
5832                 off = uiop->uio_offset;
5833                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5834                 if (error == 0) {
5835                         oresid = xfer = (uint64_t)uiop->uio_resid;
5836                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5837                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5838                         /*
5839                          * For Flex File layout with mirrored DSs, select one
5840                          * of them at random for reads. For writes and commits,
5841                          * do all mirrors.
5842                          */
5843                         m = NULL;
5844                         tdrpc = drpc = NULL;
5845                         firstmirror = 0;
5846                         mirrorcnt = 1;
5847                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5848                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5849                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5850                                         firstmirror = arc4random() % mirrorcnt;
5851                                         mirrorcnt = firstmirror + 1;
5852                                 } else {
5853                                         if (docommit == 0) {
5854                                                 /*
5855                                                  * Save values, so uiop can be
5856                                                  * rolled back upon a write
5857                                                  * error.
5858                                                  */
5859                                                 offs = uiop->uio_offset;
5860                                                 resid = uiop->uio_resid;
5861                                                 iovbase =
5862                                                     uiop->uio_iov->iov_base;
5863                                                 iovlen = uiop->uio_iov->iov_len;
5864                                                 m = nfsm_uiombuflist(uiop, len,
5865                                                     0);
5866                                         }
5867                                         tdrpc = drpc = malloc(sizeof(*drpc) *
5868                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
5869                                             M_ZERO);
5870                                 }
5871                         }
5872                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5873                                 m2 = NULL;
5874                                 if (m != NULL && i < mirrorcnt - 1)
5875                                         m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
5876                                 else {
5877                                         m2 = m;
5878                                         m = NULL;
5879                                 }
5880                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
5881                                         dev = rflp->nfsfl_ffm[i].dev;
5882                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5883                                             rflp->nfsfl_ffm[i].devp);
5884                                 } else {
5885                                         dev = rflp->nfsfl_dev;
5886                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5887                                             rflp->nfsfl_devp);
5888                                 }
5889                                 if (dip != NULL) {
5890                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5891                                             != 0)
5892                                                 error = nfscl_dofflayoutio(vp,
5893                                                     uiop, iomode, must_commit,
5894                                                     &eof, &stateid, rwaccess,
5895                                                     dip, layp, rflp, off, xfer,
5896                                                     i, docommit, m2, tdrpc,
5897                                                     newcred, p);
5898                                         else
5899                                                 error = nfscl_doflayoutio(vp,
5900                                                     uiop, iomode, must_commit,
5901                                                     &eof, &stateid, rwaccess,
5902                                                     dip, layp, rflp, off, xfer,
5903                                                     docommit, newcred, p);
5904                                         nfscl_reldevinfo(dip);
5905                                 } else {
5906                                         if (m2 != NULL)
5907                                                 m_freem(m2);
5908                                         error = EIO;
5909                                 }
5910                                 tdrpc++;
5911                         }
5912                         if (m != NULL)
5913                                 m_freem(m);
5914                         tdrpc = drpc;
5915                         timo = hz / 50;         /* Wait for 20msec. */
5916                         if (timo < 1)
5917                                 timo = 1;
5918                         for (i = firstmirror; i < mirrorcnt - 1 &&
5919                             tdrpc != NULL; i++, tdrpc++) {
5920                                 /*
5921                                  * For the unused drpc entries, both inprog and
5922                                  * err == 0, so this loop won't break.
5923                                  */
5924                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5925                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
5926                                             timo);
5927                                 if (error == 0 && tdrpc->err != 0)
5928                                         error = tdrpc->err;
5929                         }
5930                         free(drpc, M_TEMP);
5931                         if (error == 0) {
5932                                 if (mirrorcnt > 1 && rwaccess ==
5933                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5934                                         NFSLOCKCLSTATE();
5935                                         layp->nfsly_flags |= NFSLY_WRITTEN;
5936                                         NFSUNLOCKCLSTATE();
5937                                 }
5938                                 lastbyte = off + xfer - 1;
5939                                 NFSLOCKCLSTATE();
5940                                 if (lastbyte > layp->nfsly_lastbyte)
5941                                         layp->nfsly_lastbyte = lastbyte;
5942                                 NFSUNLOCKCLSTATE();
5943                         } else if (error == NFSERR_OPENMODE &&
5944                             rwaccess == NFSV4OPEN_ACCESSREAD) {
5945                                 NFSLOCKMNT(nmp);
5946                                 nmp->nm_state |= NFSSTA_OPENMODE;
5947                                 NFSUNLOCKMNT(nmp);
5948                         } else
5949                                 error = EIO;
5950                         if (error == 0)
5951                                 len -= (oresid - (uint64_t)uiop->uio_resid);
5952                         else if (mirrorcnt > 1 && rwaccess ==
5953                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5954                                 /*
5955                                  * In case the rpc gets retried, roll the
5956                                  * uio fields changed by nfsm_uiombuflist()
5957                                  * back.
5958                                  */
5959                                 uiop->uio_offset = offs;
5960                                 uiop->uio_resid = resid;
5961                                 uiop->uio_iov->iov_base = iovbase;
5962                                 uiop->uio_iov->iov_len = iovlen;
5963                         }
5964                 }
5965         }
5966         if (lckp != NULL)
5967                 nfscl_lockderef(lckp);
5968         NFSFREECRED(newcred);
5969         nfscl_rellayout(layp, 0);
5970         nfscl_relref(nmp);
5971         return (error);
5972 }
5973
5974 /*
5975  * Find a file layout that will handle the first bytes of the requested
5976  * range and return the information from it needed to the I/O operation.
5977  */
5978 int
5979 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5980     struct nfsclflayout **retflpp)
5981 {
5982         struct nfsclflayout *flp, *nflp, *rflp;
5983         uint32_t rw;
5984
5985         rflp = NULL;
5986         rw = rwaccess;
5987         /* For reading, do the Read list first and then the Write list. */
5988         do {
5989                 if (rw == NFSV4OPEN_ACCESSREAD)
5990                         flp = LIST_FIRST(&lyp->nfsly_flayread);
5991                 else
5992                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
5993                 while (flp != NULL) {
5994                         nflp = LIST_NEXT(flp, nfsfl_list);
5995                         if (flp->nfsfl_off > off)
5996                                 break;
5997                         if (flp->nfsfl_end > off &&
5998                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5999                                 rflp = flp;
6000                         flp = nflp;
6001                 }
6002                 if (rw == NFSV4OPEN_ACCESSREAD)
6003                         rw = NFSV4OPEN_ACCESSWRITE;
6004                 else
6005                         rw = 0;
6006         } while (rw != 0);
6007         if (rflp != NULL) {
6008                 /* This one covers the most bytes starting at off. */
6009                 *retflpp = rflp;
6010                 return (0);
6011         }
6012         return (EIO);
6013 }
6014
6015 /*
6016  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6017  */
6018 static int
6019 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6020     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6021     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6022     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6023 {
6024         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6025         int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6026         struct nfsnode *np;
6027         struct nfsfh *fhp;
6028         struct nfsclds **dspp;
6029
6030         np = VTONFS(vp);
6031         rel_off = off - flp->nfsfl_patoff;
6032         stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
6033         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6034             dp->nfsdi_stripecnt;
6035         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6036         error = 0;
6037
6038         /* Loop around, doing I/O for each stripe unit. */
6039         while (len > 0 && error == 0) {
6040                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6041                 dspp = nfsfldi_addr(dp, stripe_index);
6042                 if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6043                         minorvers = NFSV42_MINORVERSION;
6044                 else
6045                         minorvers = NFSV41_MINORVERSION;
6046                 if (len > transfer && docommit == 0)
6047                         xfer = transfer;
6048                 else
6049                         xfer = len;
6050                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6051                         /* Dense layout. */
6052                         if (stripe_pos >= flp->nfsfl_fhcnt)
6053                                 return (EIO);
6054                         fhp = flp->nfsfl_fh[stripe_pos];
6055                         io_off = (rel_off / (stripe_unit_size *
6056                             dp->nfsdi_stripecnt)) * stripe_unit_size +
6057                             rel_off % stripe_unit_size;
6058                 } else {
6059                         /* Sparse layout. */
6060                         if (flp->nfsfl_fhcnt > 1) {
6061                                 if (stripe_index >= flp->nfsfl_fhcnt)
6062                                         return (EIO);
6063                                 fhp = flp->nfsfl_fh[stripe_index];
6064                         } else if (flp->nfsfl_fhcnt == 1)
6065                                 fhp = flp->nfsfl_fh[0];
6066                         else
6067                                 fhp = np->n_fhp;
6068                         io_off = off;
6069                 }
6070                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6071                         commit_thru_mds = 1;
6072                         if (docommit != 0)
6073                                 error = EIO;
6074                 } else {
6075                         commit_thru_mds = 0;
6076                         NFSLOCKNODE(np);
6077                         np->n_flag |= NDSCOMMIT;
6078                         NFSUNLOCKNODE(np);
6079                 }
6080                 if (docommit != 0) {
6081                         if (error == 0)
6082                                 error = nfsrpc_commitds(vp, io_off, xfer,
6083                                     *dspp, fhp, NFS_VER4, minorvers, cred, p);
6084                         if (error == 0) {
6085                                 /*
6086                                  * Set both eof and uio_resid = 0 to end any
6087                                  * loops.
6088                                  */
6089                                 *eofp = 1;
6090                                 uiop->uio_resid = 0;
6091                         } else {
6092                                 NFSLOCKNODE(np);
6093                                 np->n_flag &= ~NDSCOMMIT;
6094                                 NFSUNLOCKNODE(np);
6095                         }
6096                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6097                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6098                             io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6099                 else {
6100                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6101                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6102                             0, NFS_VER4, minorvers, cred, p);
6103                         if (error == 0) {
6104                                 NFSLOCKCLSTATE();
6105                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
6106                                 NFSUNLOCKCLSTATE();
6107                         }
6108                 }
6109                 if (error == 0) {
6110                         transfer = stripe_unit_size;
6111                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6112                         len -= xfer;
6113                         off += xfer;
6114                 }
6115         }
6116         return (error);
6117 }
6118
6119 /*
6120  * Do I/O using an NFSv4.1 flex file layout.
6121  */
6122 static int
6123 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6124     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6125     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6126     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6127     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6128 {
6129         uint64_t xfer;
6130         int error;
6131         struct nfsnode *np;
6132         struct nfsfh *fhp;
6133         struct nfsclds **dspp;
6134         struct ucred *tcred;
6135         struct mbuf *m, *m2;
6136         uint32_t copylen;
6137
6138         np = VTONFS(vp);
6139         error = 0;
6140         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6141             (uintmax_t)len);
6142         /* Loop around, doing I/O for each stripe unit. */
6143         while (len > 0 && error == 0) {
6144                 dspp = nfsfldi_addr(dp, 0);
6145                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6146                 stateidp = &flp->nfsfl_ffm[mirror].st;
6147                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6148                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6149                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6150                         tcred = NFSNEWCRED(cred);
6151                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6152                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6153                         tcred->cr_ngroups = 1;
6154                 } else
6155                         tcred = cred;
6156                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6157                         copylen = dp->nfsdi_rsize;
6158                 else {
6159                         copylen = dp->nfsdi_wsize;
6160                         if (len > copylen && mp != NULL) {
6161                                 /*
6162                                  * When a mirrored configuration needs to do
6163                                  * multiple writes to each mirror, all writes
6164                                  * except the last one must be a multiple of
6165                                  * 4 bytes.  This is required so that the XDR
6166                                  * does not need padding.
6167                                  * If possible, clip the size to an exact
6168                                  * multiple of the mbuf length, so that the
6169                                  * split will be on an mbuf boundary.
6170                                  */
6171                                 copylen &= 0xfffffffc;
6172                                 if (copylen > mp->m_len)
6173                                         copylen = copylen / mp->m_len *
6174                                             mp->m_len;
6175                         }
6176                 }
6177                 NFSLOCKNODE(np);
6178                 np->n_flag |= NDSCOMMIT;
6179                 NFSUNLOCKNODE(np);
6180                 if (len > copylen && docommit == 0)
6181                         xfer = copylen;
6182                 else
6183                         xfer = len;
6184                 if (docommit != 0) {
6185                         if (error == 0) {
6186                                 /*
6187                                  * Do last mirrored DS commit with this thread.
6188                                  */
6189                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6190                                         error = nfsio_commitds(vp, off, xfer,
6191                                             *dspp, fhp, dp->nfsdi_vers,
6192                                             dp->nfsdi_minorvers, drpc, tcred,
6193                                             p);
6194                                 else
6195                                         error = nfsrpc_commitds(vp, off, xfer,
6196                                             *dspp, fhp, dp->nfsdi_vers,
6197                                             dp->nfsdi_minorvers, tcred, p);
6198                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6199                                 if (error != 0 && error != EACCES && error !=
6200                                     ESTALE) {
6201                                         NFSCL_DEBUG(4,
6202                                             "DS layreterr for commit\n");
6203                                         nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6204                                             lyp, *dspp);
6205                                 }
6206                         }
6207                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6208                         if (error == 0) {
6209                                 /*
6210                                  * Set both eof and uio_resid = 0 to end any
6211                                  * loops.
6212                                  */
6213                                 *eofp = 1;
6214                                 uiop->uio_resid = 0;
6215                         } else {
6216                                 NFSLOCKNODE(np);
6217                                 np->n_flag &= ~NDSCOMMIT;
6218                                 NFSUNLOCKNODE(np);
6219                         }
6220                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6221                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6222                             off, xfer, fhp, 1, dp->nfsdi_vers,
6223                             dp->nfsdi_minorvers, tcred, p);
6224                         NFSCL_DEBUG(4, "readds=%d\n", error);
6225                         if (error != 0 && error != EACCES && error != ESTALE) {
6226                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6227                                 nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6228                                     *dspp);
6229                         }
6230                 } else {
6231                         if (flp->nfsfl_mirrorcnt == 1) {
6232                                 error = nfsrpc_writeds(vp, uiop, iomode,
6233                                     must_commit, stateidp, *dspp, off, xfer,
6234                                     fhp, 0, 1, dp->nfsdi_vers,
6235                                     dp->nfsdi_minorvers, tcred, p);
6236                                 if (error == 0) {
6237                                         NFSLOCKCLSTATE();
6238                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6239                                         NFSUNLOCKCLSTATE();
6240                                 }
6241                         } else {
6242                                 m = mp;
6243                                 if (xfer < len) {
6244                                         /* The mbuf list must be split. */
6245                                         m2 = nfsm_split(mp, xfer);
6246                                         if (m2 != NULL)
6247                                                 mp = m2;
6248                                         else {
6249                                                 m_freem(mp);
6250                                                 error = EIO;
6251                                         }
6252                                 }
6253                                 NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6254                                     (uintmax_t)len, (uintmax_t)xfer);
6255                                 /*
6256                                  * Do last write to a mirrored DS with this
6257                                  * thread.
6258                                  */
6259                                 if (error == 0) {
6260                                         if (mirror < flp->nfsfl_mirrorcnt - 1)
6261                                                 error = nfsio_writedsmir(vp,
6262                                                     iomode, must_commit,
6263                                                     stateidp, *dspp, off,
6264                                                     xfer, fhp, m,
6265                                                     dp->nfsdi_vers,
6266                                                     dp->nfsdi_minorvers, drpc,
6267                                                     tcred, p);
6268                                         else
6269                                                 error = nfsrpc_writedsmir(vp,
6270                                                     iomode, must_commit,
6271                                                     stateidp, *dspp, off,
6272                                                     xfer, fhp, m,
6273                                                     dp->nfsdi_vers,
6274                                                     dp->nfsdi_minorvers, tcred,
6275                                                     p);
6276                                 }
6277                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6278                                 if (error != 0 && error != EACCES && error !=
6279                                     ESTALE) {
6280                                         NFSCL_DEBUG(4,
6281                                             "DS layreterr for write\n");
6282                                         nfscl_dserr(NFSV4OP_WRITE, error, dp,
6283                                             lyp, *dspp);
6284                                 }
6285                         }
6286                 }
6287                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6288                 if (error == 0) {
6289                         len -= xfer;
6290                         off += xfer;
6291                 }
6292                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6293                         NFSFREECRED(tcred);
6294         }
6295         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6296         return (error);
6297 }
6298
6299 /*
6300  * The actual read RPC done to a DS.
6301  */
6302 static int
6303 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6304     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6305     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6306 {
6307         uint32_t *tl;
6308         int attrflag, error, retlen;
6309         struct nfsrv_descript nfsd;
6310         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6311         struct nfsrv_descript *nd = &nfsd;
6312         struct nfssockreq *nrp;
6313         struct nfsvattr na;
6314
6315         nd->nd_mrep = NULL;
6316         if (vers == 0 || vers == NFS_VER4) {
6317                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6318                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6319                     false);
6320                 vers = NFS_VER4;
6321                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6322                 if (flex != 0)
6323                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6324                 else
6325                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6326         } else {
6327                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6328                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6329                     false);
6330                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6331         }
6332         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6333         txdr_hyper(io_off, tl);
6334         *(tl + 2) = txdr_unsigned(len);
6335         nrp = dsp->nfsclds_sockp;
6336         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6337         if (nrp == NULL)
6338                 /* If NULL, use the MDS socket. */
6339                 nrp = &nmp->nm_sockreq;
6340         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6341             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6342         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6343             error);
6344         if (error != 0)
6345                 return (error);
6346         if (vers == NFS_VER3) {
6347                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6348                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6349                 if (error != 0)
6350                         goto nfsmout;
6351         }
6352         if (nd->nd_repstat != 0) {
6353                 error = nd->nd_repstat;
6354                 goto nfsmout;
6355         }
6356         if (vers == NFS_VER3) {
6357                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6358                 *eofp = fxdr_unsigned(int, *(tl + 1));
6359         } else {
6360                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6361                 *eofp = fxdr_unsigned(int, *tl);
6362         }
6363         NFSM_STRSIZ(retlen, len);
6364         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6365         error = nfsm_mbufuio(nd, uiop, retlen);
6366 nfsmout:
6367         if (nd->nd_mrep != NULL)
6368                 m_freem(nd->nd_mrep);
6369         return (error);
6370 }
6371
6372 /*
6373  * The actual write RPC done to a DS.
6374  */
6375 static int
6376 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6377     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6378     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6379     struct ucred *cred, NFSPROC_T *p)
6380 {
6381         uint32_t *tl;
6382         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6383         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6384         int32_t backup;
6385         struct nfsrv_descript nfsd;
6386         struct nfsrv_descript *nd = &nfsd;
6387         struct nfssockreq *nrp;
6388         struct nfsvattr na;
6389
6390         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6391         nd->nd_mrep = NULL;
6392         if (vers == 0 || vers == NFS_VER4) {
6393                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6394                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6395                     false);
6396                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6397                 vers = NFS_VER4;
6398                 if (flex != 0)
6399                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6400                 else
6401                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6402                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6403         } else {
6404                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6405                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6406                     false);
6407                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6408                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6409         }
6410         txdr_hyper(io_off, tl);
6411         tl += 2;
6412         if (vers == NFS_VER3)
6413                 *tl++ = txdr_unsigned(len);
6414         *tl++ = txdr_unsigned(*iomode);
6415         *tl = txdr_unsigned(len);
6416         nfsm_uiombuf(nd, uiop, len);
6417         nrp = dsp->nfsclds_sockp;
6418         if (nrp == NULL)
6419                 /* If NULL, use the MDS socket. */
6420                 nrp = &nmp->nm_sockreq;
6421         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6422             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6423         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6424             nd->nd_repstat);
6425         if (error != 0)
6426                 return (error);
6427         if (nd->nd_repstat != 0) {
6428                 /*
6429                  * In case the rpc gets retried, roll
6430                  * the uio fileds changed by nfsm_uiombuf()
6431                  * back.
6432                  */
6433                 uiop->uio_offset -= len;
6434                 uiop->uio_resid += len;
6435                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6436                 uiop->uio_iov->iov_len += len;
6437                 error = nd->nd_repstat;
6438         } else {
6439                 if (vers == NFS_VER3) {
6440                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6441                             NULL);
6442                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6443                         if (error != 0)
6444                                 goto nfsmout;
6445                 }
6446                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6447                 rlen = fxdr_unsigned(int, *tl++);
6448                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6449                 if (rlen == 0) {
6450                         error = NFSERR_IO;
6451                         goto nfsmout;
6452                 } else if (rlen < len) {
6453                         backup = len - rlen;
6454                         uiop->uio_iov->iov_base =
6455                             (char *)uiop->uio_iov->iov_base - backup;
6456                         uiop->uio_iov->iov_len += backup;
6457                         uiop->uio_offset -= backup;
6458                         uiop->uio_resid += backup;
6459                         len = rlen;
6460                 }
6461                 commit = fxdr_unsigned(int, *tl++);
6462
6463                 /*
6464                  * Return the lowest commitment level
6465                  * obtained by any of the RPCs.
6466                  */
6467                 if (committed == NFSWRITE_FILESYNC)
6468                         committed = commit;
6469                 else if (committed == NFSWRITE_DATASYNC &&
6470                     commit == NFSWRITE_UNSTABLE)
6471                         committed = commit;
6472                 if (commit_thru_mds != 0) {
6473                         NFSLOCKMNT(nmp);
6474                         if (!NFSHASWRITEVERF(nmp)) {
6475                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6476                                 NFSSETWRITEVERF(nmp);
6477                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6478                                 *must_commit = 1;
6479                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6480                         }
6481                         NFSUNLOCKMNT(nmp);
6482                 } else {
6483                         NFSLOCKDS(dsp);
6484                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6485                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6486                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6487                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6488                                 *must_commit = 1;
6489                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6490                         }
6491                         NFSUNLOCKDS(dsp);
6492                 }
6493         }
6494 nfsmout:
6495         if (nd->nd_mrep != NULL)
6496                 m_freem(nd->nd_mrep);
6497         *iomode = committed;
6498         if (nd->nd_repstat != 0 && error == 0)
6499                 error = nd->nd_repstat;
6500         return (error);
6501 }
6502
6503 /*
6504  * The actual write RPC done to a DS.
6505  * This variant is called from a separate kernel process for mirrors.
6506  * Any short write is considered an IO error.
6507  */
6508 static int
6509 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6510     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6511     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6512     struct ucred *cred, NFSPROC_T *p)
6513 {
6514         uint32_t *tl;
6515         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6516         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6517         struct nfsrv_descript nfsd;
6518         struct nfsrv_descript *nd = &nfsd;
6519         struct nfssockreq *nrp;
6520         struct nfsvattr na;
6521
6522         nd->nd_mrep = NULL;
6523         if (vers == 0 || vers == NFS_VER4) {
6524                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6525                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6526                     false);
6527                 vers = NFS_VER4;
6528                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6529                     minorvers);
6530                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6531                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6532         } else {
6533                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6534                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6535                     false);
6536                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6537                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6538         }
6539         txdr_hyper(io_off, tl);
6540         tl += 2;
6541         if (vers == NFS_VER3)
6542                 *tl++ = txdr_unsigned(len);
6543         *tl++ = txdr_unsigned(*iomode);
6544         *tl = txdr_unsigned(len);
6545         if (len > 0) {
6546                 /* Put data in mbuf chain. */
6547                 nd->nd_mb->m_next = m;
6548         }
6549         nrp = dsp->nfsclds_sockp;
6550         if (nrp == NULL)
6551                 /* If NULL, use the MDS socket. */
6552                 nrp = &nmp->nm_sockreq;
6553         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6554             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6555         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6556             nd->nd_repstat);
6557         if (error != 0)
6558                 return (error);
6559         if (nd->nd_repstat != 0)
6560                 error = nd->nd_repstat;
6561         else {
6562                 if (vers == NFS_VER3) {
6563                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6564                             NULL);
6565                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6566                             error);
6567                         if (error != 0)
6568                                 goto nfsmout;
6569                 }
6570                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6571                 rlen = fxdr_unsigned(int, *tl++);
6572                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6573                     rlen);
6574                 if (rlen != len) {
6575                         error = NFSERR_IO;
6576                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6577                             len, rlen);
6578                         goto nfsmout;
6579                 }
6580                 commit = fxdr_unsigned(int, *tl++);
6581
6582                 /*
6583                  * Return the lowest commitment level
6584                  * obtained by any of the RPCs.
6585                  */
6586                 if (committed == NFSWRITE_FILESYNC)
6587                         committed = commit;
6588                 else if (committed == NFSWRITE_DATASYNC &&
6589                     commit == NFSWRITE_UNSTABLE)
6590                         committed = commit;
6591                 NFSLOCKDS(dsp);
6592                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6593                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6594                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6595                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6596                         *must_commit = 1;
6597                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6598                 }
6599                 NFSUNLOCKDS(dsp);
6600         }
6601 nfsmout:
6602         if (nd->nd_mrep != NULL)
6603                 m_freem(nd->nd_mrep);
6604         *iomode = committed;
6605         if (nd->nd_repstat != 0 && error == 0)
6606                 error = nd->nd_repstat;
6607         return (error);
6608 }
6609
6610 /*
6611  * Start up the thread that will execute nfsrpc_writedsmir().
6612  */
6613 static void
6614 start_writedsmir(void *arg, int pending)
6615 {
6616         struct nfsclwritedsdorpc *drpc;
6617
6618         drpc = (struct nfsclwritedsdorpc *)arg;
6619         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6620             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6621             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6622             drpc->p);
6623         drpc->done = 1;
6624         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6625 }
6626
6627 /*
6628  * Set up the write DS mirror call for the pNFS I/O thread.
6629  */
6630 static int
6631 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6632     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6633     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6634     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6635 {
6636         int error, ret;
6637
6638         error = 0;
6639         drpc->done = 0;
6640         drpc->vp = vp;
6641         drpc->iomode = *iomode;
6642         drpc->must_commit = *must_commit;
6643         drpc->stateidp = stateidp;
6644         drpc->dsp = dsp;
6645         drpc->off = off;
6646         drpc->len = len;
6647         drpc->fhp = fhp;
6648         drpc->m = m;
6649         drpc->vers = vers;
6650         drpc->minorvers = minorvers;
6651         drpc->cred = cred;
6652         drpc->p = p;
6653         drpc->inprog = 0;
6654         ret = EIO;
6655         if (nfs_pnfsiothreads != 0) {
6656                 ret = nfs_pnfsio(start_writedsmir, drpc);
6657                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6658         }
6659         if (ret != 0)
6660                 error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6661                     dsp, off, len, fhp, m, vers, minorvers, cred, p);
6662         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6663         return (error);
6664 }
6665
6666 /*
6667  * Free up the nfsclds structure.
6668  */
6669 void
6670 nfscl_freenfsclds(struct nfsclds *dsp)
6671 {
6672         int i;
6673
6674         if (dsp == NULL)
6675                 return;
6676         if (dsp->nfsclds_sockp != NULL) {
6677                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6678                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6679                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6680                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6681         }
6682         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6683         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6684         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6685                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6686                         m_freem(
6687                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6688         }
6689         free(dsp, M_NFSCLDS);
6690 }
6691
6692 static enum nfsclds_state
6693 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6694     struct nfsclds **retdspp, uint32_t *sequencep)
6695 {
6696         struct nfsclds *dsp;
6697         int fndseq;
6698
6699         /*
6700          * Search the list of nfsclds structures for one with the same
6701          * server.
6702          */
6703         fndseq = 0;
6704         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6705                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6706                     dsp->nfsclds_servownlen != 0 &&
6707                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6708                     dsp->nfsclds_servownlen) &&
6709                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6710                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6711                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6712                             dsp->nfsclds_flags);
6713                         if (fndseq == 0) {
6714                                 /* Get sequenceid# from first entry. */
6715                                 *sequencep =
6716                                     dsp->nfsclds_sess.nfsess_sequenceid;
6717                                 fndseq = 1;
6718                         }
6719                         /* Server major id matches. */
6720                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6721                                 *retdspp = dsp;
6722                                 return (NFSDSP_USETHISSESSION);
6723                         }
6724
6725                 }
6726         }
6727         if (fndseq != 0)
6728                 return (NFSDSP_SEQTHISSESSION);
6729         return (NFSDSP_NOTFOUND);
6730 }
6731
6732 /*
6733  * NFS commit rpc to a NFSv4.1 DS.
6734  */
6735 static int
6736 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6737     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6738     NFSPROC_T *p)
6739 {
6740         uint32_t *tl;
6741         struct nfsrv_descript nfsd, *nd = &nfsd;
6742         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6743         struct nfssockreq *nrp;
6744         struct nfsvattr na;
6745         int attrflag, error;
6746         
6747         nd->nd_mrep = NULL;
6748         if (vers == 0 || vers == NFS_VER4) {
6749                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6750                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6751                     false);
6752                 vers = NFS_VER4;
6753         } else
6754                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6755                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6756                     false);
6757         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6758             minorvers);
6759         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6760         txdr_hyper(offset, tl);
6761         tl += 2;
6762         *tl = txdr_unsigned(cnt);
6763         nrp = dsp->nfsclds_sockp;
6764         if (nrp == NULL)
6765                 /* If NULL, use the MDS socket. */
6766                 nrp = &nmp->nm_sockreq;
6767         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6768             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6769         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6770             nd->nd_repstat);
6771         if (error != 0)
6772                 return (error);
6773         if (nd->nd_repstat == 0) {
6774                 if (vers == NFS_VER3) {
6775                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6776                             NULL);
6777                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6778                         if (error != 0)
6779                                 goto nfsmout;
6780                 }
6781                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6782                 NFSLOCKDS(dsp);
6783                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6784                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6785                         error = NFSERR_STALEWRITEVERF;
6786                 }
6787                 NFSUNLOCKDS(dsp);
6788         }
6789 nfsmout:
6790         if (error == 0 && nd->nd_repstat != 0)
6791                 error = nd->nd_repstat;
6792         m_freem(nd->nd_mrep);
6793         return (error);
6794 }
6795
6796 /*
6797  * Start up the thread that will execute nfsrpc_commitds().
6798  */
6799 static void
6800 start_commitds(void *arg, int pending)
6801 {
6802         struct nfsclwritedsdorpc *drpc;
6803
6804         drpc = (struct nfsclwritedsdorpc *)arg;
6805         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
6806             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
6807             drpc->p);
6808         drpc->done = 1;
6809         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
6810 }
6811
6812 /*
6813  * Set up the commit DS mirror call for the pNFS I/O thread.
6814  */
6815 static int
6816 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6817     struct nfsfh *fhp, int vers, int minorvers,
6818     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6819 {
6820         int error, ret;
6821
6822         error = 0;
6823         drpc->done = 0;
6824         drpc->vp = vp;
6825         drpc->off = offset;
6826         drpc->len = cnt;
6827         drpc->dsp = dsp;
6828         drpc->fhp = fhp;
6829         drpc->vers = vers;
6830         drpc->minorvers = minorvers;
6831         drpc->cred = cred;
6832         drpc->p = p;
6833         drpc->inprog = 0;
6834         ret = EIO;
6835         if (nfs_pnfsiothreads != 0) {
6836                 ret = nfs_pnfsio(start_commitds, drpc);
6837                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
6838         }
6839         if (ret != 0)
6840                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
6841                     minorvers, cred, p);
6842         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
6843         return (error);
6844 }
6845
6846 /*
6847  * NFS Advise rpc
6848  */
6849 int
6850 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
6851     struct ucred *cred, NFSPROC_T *p)
6852 {
6853         u_int32_t *tl;
6854         struct nfsrv_descript nfsd, *nd = &nfsd;
6855         nfsattrbit_t hints;
6856         int error;
6857         
6858         NFSZERO_ATTRBIT(&hints);
6859         if (advise == POSIX_FADV_WILLNEED)
6860                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6861         else if (advise == POSIX_FADV_DONTNEED)
6862                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6863         else
6864                 return (0);
6865         NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp);
6866         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6867         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
6868         txdr_hyper(offset, tl);
6869         tl += 2;
6870         txdr_hyper(cnt, tl);
6871         nfsrv_putattrbit(nd, &hints);
6872         error = nfscl_request(nd, vp, p, cred, NULL);
6873         if (error != 0)
6874                 return (error);
6875         if (nd->nd_repstat != 0)
6876                 error = nd->nd_repstat;
6877         m_freem(nd->nd_mrep);
6878         return (error);
6879 }
6880
6881 #ifdef notyet
6882 /*
6883  * NFS advise rpc to a NFSv4.2 DS.
6884  */
6885 static int
6886 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6887     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6888     struct ucred *cred, NFSPROC_T *p)
6889 {
6890         uint32_t *tl;
6891         struct nfsrv_descript nfsd, *nd = &nfsd;
6892         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6893         struct nfssockreq *nrp;
6894         nfsattrbit_t hints;
6895         int error;
6896         
6897         /* For NFS DSs prior to NFSv4.2, just return OK. */
6898         if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
6899                 return (0);
6900         NFSZERO_ATTRBIT(&hints);
6901         if (advise == POSIX_FADV_WILLNEED)
6902                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6903         else if (advise == POSIX_FADV_DONTNEED)
6904                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6905         else
6906                 return (0);
6907         nd->nd_mrep = NULL;
6908         nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
6909             fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, false);
6910         vers = NFS_VER4;
6911         NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
6912             minorvers);
6913         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6914         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6915         txdr_hyper(offset, tl);
6916         tl += 2;
6917         *tl = txdr_unsigned(cnt);
6918         nfsrv_putattrbit(nd, &hints);
6919         nrp = dsp->nfsclds_sockp;
6920         if (nrp == NULL)
6921                 /* If NULL, use the MDS socket. */
6922                 nrp = &nmp->nm_sockreq;
6923         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6924             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6925         NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
6926             nd->nd_repstat);
6927         if (error != 0)
6928                 return (error);
6929         if (nd->nd_repstat != 0)
6930                 error = nd->nd_repstat;
6931         m_freem(nd->nd_mrep);
6932         return (error);
6933 }
6934
6935 /*
6936  * Start up the thread that will execute nfsrpc_commitds().
6937  */
6938 static void
6939 start_adviseds(void *arg, int pending)
6940 {
6941         struct nfsclwritedsdorpc *drpc;
6942
6943         drpc = (struct nfsclwritedsdorpc *)arg;
6944         drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
6945             drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
6946             drpc->cred, drpc->p);
6947         drpc->done = 1;
6948         NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
6949 }
6950
6951 /*
6952  * Set up the commit DS mirror call for the pNFS I/O thread.
6953  */
6954 static int
6955 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6956     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6957     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6958 {
6959         int error, ret;
6960
6961         error = 0;
6962         drpc->done = 0;
6963         drpc->vp = vp;
6964         drpc->off = offset;
6965         drpc->len = cnt;
6966         drpc->advise = advise;
6967         drpc->dsp = dsp;
6968         drpc->fhp = fhp;
6969         drpc->vers = vers;
6970         drpc->minorvers = minorvers;
6971         drpc->cred = cred;
6972         drpc->p = p;
6973         drpc->inprog = 0;
6974         ret = EIO;
6975         if (nfs_pnfsiothreads != 0) {
6976                 ret = nfs_pnfsio(start_adviseds, drpc);
6977                 NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
6978         }
6979         if (ret != 0)
6980                 error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
6981                     minorvers, cred, p);
6982         NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
6983         return (error);
6984 }
6985 #endif  /* notyet */
6986
6987 /*
6988  * Do the Allocate operation, retrying for recovery.
6989  */
6990 int
6991 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
6992     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
6993 {
6994         int error, expireret = 0, retrycnt, nostateid;
6995         uint32_t clidrev = 0;
6996         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6997         struct nfsfh *nfhp = NULL;
6998         nfsv4stateid_t stateid;
6999         off_t tmp_off;
7000         void *lckp;
7001
7002         if (len < 0)
7003                 return (EINVAL);
7004         if (len == 0)
7005                 return (0);
7006         tmp_off = off + len;
7007         NFSLOCKMNT(nmp);
7008         if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7009                 NFSUNLOCKMNT(nmp);
7010                 return (EFBIG);
7011         }
7012         if (nmp->nm_clp != NULL)
7013                 clidrev = nmp->nm_clp->nfsc_clientidrev;
7014         NFSUNLOCKMNT(nmp);
7015         nfhp = VTONFS(vp)->n_fhp;
7016         retrycnt = 0;
7017         do {
7018                 lckp = NULL;
7019                 nostateid = 0;
7020                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7021                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7022                 if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7023                     stateid.other[2] == 0) {
7024                         nostateid = 1;
7025                         NFSCL_DEBUG(1, "stateid0 in allocate\n");
7026                 }
7027
7028                 /*
7029                  * Not finding a stateid should probably never happen,
7030                  * but just return an error for this case.
7031                  */
7032                 if (nostateid != 0)
7033                         error = EIO;
7034                 else
7035                         error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7036                             nap, attrflagp, cred, p, stuff);
7037                 if (error == NFSERR_STALESTATEID)
7038                         nfscl_initiate_recovery(nmp->nm_clp);
7039                 if (lckp != NULL)
7040                         nfscl_lockderef(lckp);
7041                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7042                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7043                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7044                         (void) nfs_catnap(PZERO, error, "nfs_allocate");
7045                 } else if ((error == NFSERR_EXPIRED ||
7046                     error == NFSERR_BADSTATEID) && clidrev != 0) {
7047                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7048                 }
7049                 retrycnt++;
7050         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7051             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7052             error == NFSERR_STALEDONTRECOVER ||
7053             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7054             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7055              expireret == 0 && clidrev != 0 && retrycnt < 4));
7056         if (error != 0 && retrycnt >= 4)
7057                 error = EIO;
7058         return (error);
7059 }
7060
7061 /*
7062  * The allocate RPC.
7063  */
7064 static int
7065 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7066     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p,
7067     void *stuff)
7068 {
7069         uint32_t *tl;
7070         int error;
7071         struct nfsrv_descript nfsd;
7072         struct nfsrv_descript *nd = &nfsd;
7073         nfsattrbit_t attrbits;
7074
7075         *attrflagp = 0;
7076         NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp);
7077         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7078         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7079         txdr_hyper(off, tl); tl += 2;
7080         txdr_hyper(len, tl); tl += 2;
7081         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7082         NFSGETATTR_ATTRBIT(&attrbits);
7083         nfsrv_putattrbit(nd, &attrbits);
7084         error = nfscl_request(nd, vp, p, cred, stuff);
7085         if (error != 0)
7086                 return (error);
7087         if (nd->nd_repstat == 0) {
7088                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7089                 error = nfsm_loadattr(nd, nap);
7090                 if (error == 0)
7091                         *attrflagp = NFS_LATTR_NOSHRINK;
7092         } else
7093                 error = nd->nd_repstat;
7094 nfsmout:
7095         m_freem(nd->nd_mrep);
7096         return (error);
7097 }
7098
7099 /*
7100  * Set up the XDR arguments for the LayoutGet operation.
7101  */
7102 static void
7103 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7104     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7105     int layoutlen, int usecurstateid)
7106 {
7107         uint32_t *tl;
7108
7109         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7110             NFSX_STATEID);
7111         *tl++ = newnfs_false;           /* Don't signal availability. */
7112         *tl++ = txdr_unsigned(layouttype);
7113         *tl++ = txdr_unsigned(iomode);
7114         txdr_hyper(offset, tl);
7115         tl += 2;
7116         txdr_hyper(len, tl);
7117         tl += 2;
7118         txdr_hyper(minlen, tl);
7119         tl += 2;
7120         if (usecurstateid != 0) {
7121                 /* Special stateid for Current stateid. */
7122                 *tl++ = txdr_unsigned(1);
7123                 *tl++ = 0;
7124                 *tl++ = 0;
7125                 *tl++ = 0;
7126         } else {
7127                 *tl++ = txdr_unsigned(stateidp->seqid);
7128                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7129                 *tl++ = stateidp->other[0];
7130                 *tl++ = stateidp->other[1];
7131                 *tl++ = stateidp->other[2];
7132         }
7133         *tl = txdr_unsigned(layoutlen);
7134 }
7135
7136 /*
7137  * Parse the reply for a successful LayoutGet operation.
7138  */
7139 static int
7140 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7141     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7142 {
7143         uint32_t *tl;
7144         struct nfsclflayout *flp, *prevflp, *tflp;
7145         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7146         int m, mirrorcnt;
7147         uint64_t retlen, off;
7148         struct nfsfh *nfhp;
7149         uint8_t *cp;
7150         uid_t user;
7151         gid_t grp;
7152
7153         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7154         error = 0;
7155         flp = NULL;
7156         gotiomode = -1;
7157         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7158         if (*tl++ != 0)
7159                 *retonclosep = 1;
7160         else
7161                 *retonclosep = 0;
7162         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7163         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7164             (int)stateidp->seqid);
7165         stateidp->other[0] = *tl++;
7166         stateidp->other[1] = *tl++;
7167         stateidp->other[2] = *tl++;
7168         cnt = fxdr_unsigned(int, *tl);
7169         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7170         if (cnt <= 0 || cnt > 10000) {
7171                 /* Don't accept more than 10000 layouts in reply. */
7172                 error = NFSERR_BADXDR;
7173                 goto nfsmout;
7174         }
7175         for (i = 0; i < cnt; i++) {
7176                 /* Dissect to the layout type. */
7177                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7178                     3 * NFSX_UNSIGNED);
7179                 off = fxdr_hyper(tl); tl += 2;
7180                 retlen = fxdr_hyper(tl); tl += 2;
7181                 iomode = fxdr_unsigned(int, *tl++);
7182                 laytype = fxdr_unsigned(int, *tl);
7183                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7184                     (uintmax_t)off, (uintmax_t)retlen, iomode);
7185                 /* Ignore length of layout body for now. */
7186                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7187                         /* Parse the File layout up to fhcnt. */
7188                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7189                             NFSX_HYPER + NFSX_V4DEVICEID);
7190                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
7191                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
7192                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7193                         if (fhcnt < 0 || fhcnt > 100) {
7194                                 /* Don't accept more than 100 file handles. */
7195                                 error = NFSERR_BADXDR;
7196                                 goto nfsmout;
7197                         }
7198                         if (fhcnt > 0)
7199                                 flp = malloc(sizeof(*flp) + fhcnt *
7200                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
7201                                     M_WAITOK);
7202                         else
7203                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7204                                     M_WAITOK);
7205                         flp->nfsfl_flags = NFSFL_FILE;
7206                         flp->nfsfl_fhcnt = 0;
7207                         flp->nfsfl_devp = NULL;
7208                         flp->nfsfl_off = off;
7209                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7210                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7211                         else
7212                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7213                         flp->nfsfl_iomode = iomode;
7214                         if (gotiomode == -1)
7215                                 gotiomode = flp->nfsfl_iomode;
7216                         /* Ignore layout body length for now. */
7217                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7218                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7219                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7220                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7221                         mtx_lock(&nmp->nm_mtx);
7222                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7223                             NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7224                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7225                         mtx_unlock(&nmp->nm_mtx);
7226                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7227                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7228                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7229                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7230                         for (j = 0; j < fhcnt; j++) {
7231                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7232                                 nfhlen = fxdr_unsigned(int, *tl);
7233                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7234                                         error = NFSERR_BADXDR;
7235                                         goto nfsmout;
7236                                 }
7237                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7238                                     M_NFSFH, M_WAITOK);
7239                                 flp->nfsfl_fh[j] = nfhp;
7240                                 flp->nfsfl_fhcnt++;
7241                                 nfhp->nfh_len = nfhlen;
7242                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7243                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7244                         }
7245                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
7246                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7247                             NFSX_HYPER);
7248                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7249                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7250                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7251                                 error = NFSERR_BADXDR;
7252                                 goto nfsmout;
7253                         }
7254                         flp = malloc(sizeof(*flp) + mirrorcnt *
7255                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7256                         flp->nfsfl_flags = NFSFL_FLEXFILE;
7257                         flp->nfsfl_mirrorcnt = mirrorcnt;
7258                         for (j = 0; j < mirrorcnt; j++)
7259                                 flp->nfsfl_ffm[j].devp = NULL;
7260                         flp->nfsfl_off = off;
7261                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7262                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7263                         else
7264                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7265                         flp->nfsfl_iomode = iomode;
7266                         if (gotiomode == -1)
7267                                 gotiomode = flp->nfsfl_iomode;
7268                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
7269                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
7270                             (uintmax_t)flp->nfsfl_stripeunit);
7271                         for (j = 0; j < mirrorcnt; j++) {
7272                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7273                                 k = fxdr_unsigned(int, *tl);
7274                                 if (k < 1 || k > 128) {
7275                                         error = NFSERR_BADXDR;
7276                                         goto nfsmout;
7277                                 }
7278                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
7279                                 for (l = 0; l < k; l++) {
7280                                         NFSM_DISSECT(tl, uint32_t *,
7281                                             NFSX_V4DEVICEID + NFSX_STATEID +
7282                                             2 * NFSX_UNSIGNED);
7283                                         if (l == 0) {
7284                                                 /* Just use the first server. */
7285                                                 NFSBCOPY(tl,
7286                                                     flp->nfsfl_ffm[j].dev,
7287                                                     NFSX_V4DEVICEID);
7288                                                 tl += (NFSX_V4DEVICEID /
7289                                                     NFSX_UNSIGNED);
7290                                                 tl++;
7291                                                 flp->nfsfl_ffm[j].st.seqid =
7292                                                     *tl++;
7293                                                 flp->nfsfl_ffm[j].st.other[0] =
7294                                                     *tl++;
7295                                                 flp->nfsfl_ffm[j].st.other[1] =
7296                                                     *tl++;
7297                                                 flp->nfsfl_ffm[j].st.other[2] =
7298                                                     *tl++;
7299                                                 NFSCL_DEBUG(4, "st.seqid=%u "
7300                                                  "st.o0=0x%x st.o1=0x%x "
7301                                                  "st.o2=0x%x\n",
7302                                                  flp->nfsfl_ffm[j].st.seqid,
7303                                                  flp->nfsfl_ffm[j].st.other[0],
7304                                                  flp->nfsfl_ffm[j].st.other[1],
7305                                                  flp->nfsfl_ffm[j].st.other[2]);
7306                                         } else
7307                                                 tl += ((NFSX_V4DEVICEID +
7308                                                     NFSX_STATEID +
7309                                                     NFSX_UNSIGNED) /
7310                                                     NFSX_UNSIGNED);
7311                                         fhcnt = fxdr_unsigned(int, *tl);
7312                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7313                                         if (fhcnt < 1 ||
7314                                             fhcnt > NFSDEV_MAXVERS) {
7315                                                 error = NFSERR_BADXDR;
7316                                                 goto nfsmout;
7317                                         }
7318                                         for (m = 0; m < fhcnt; m++) {
7319                                                 NFSM_DISSECT(tl, uint32_t *,
7320                                                     NFSX_UNSIGNED);
7321                                                 nfhlen = fxdr_unsigned(int,
7322                                                     *tl);
7323                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
7324                                                     nfhlen);
7325                                                 if (nfhlen <= 0 || nfhlen >
7326                                                     NFSX_V4FHMAX) {
7327                                                         error = NFSERR_BADXDR;
7328                                                         goto nfsmout;
7329                                                 }
7330                                                 NFSM_DISSECT(cp, uint8_t *,
7331                                                     NFSM_RNDUP(nfhlen));
7332                                                 if (l == 0) {
7333                                                         flp->nfsfl_ffm[j].fhcnt 
7334                                                             = fhcnt;
7335                                                         nfhp = malloc(
7336                                                             sizeof(*nfhp) +
7337                                                             nfhlen - 1, M_NFSFH,
7338                                                             M_WAITOK);
7339                                                         flp->nfsfl_ffm[j].fh[m]
7340                                                             = nfhp;
7341                                                         nfhp->nfh_len = nfhlen;
7342                                                         NFSBCOPY(cp,
7343                                                             nfhp->nfh_fh,
7344                                                             nfhlen);
7345                                                         NFSCL_DEBUG(4,
7346                                                             "got fh\n");
7347                                                 }
7348                                         }
7349                                         /* Now, get the ffsd_user/ffds_group. */
7350                                         error = nfsrv_parseug(nd, 0, &user,
7351                                             &grp, curthread);
7352                                         NFSCL_DEBUG(4, "after parseu=%d\n",
7353                                             error);
7354                                         if (error == 0)
7355                                                 error = nfsrv_parseug(nd, 1,
7356                                                     &user, &grp, curthread);
7357                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
7358                                             grp);
7359                                         if (error != 0)
7360                                                 goto nfsmout;
7361                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
7362                                             user, grp);
7363                                         if (l == 0) {
7364                                                 flp->nfsfl_ffm[j].user = user;
7365                                                 flp->nfsfl_ffm[j].group = grp;
7366                                                 NFSCL_DEBUG(4,
7367                                                     "usr=%d grp=%d\n", user,
7368                                                     grp);
7369                                         }
7370                                 }
7371                         }
7372                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7373                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7374 #ifdef notnow
7375                         /*
7376                          * At this time, there is no flag.
7377                          * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7378                          * added, or it may never exist?
7379                          */
7380                         mtx_lock(&nmp->nm_mtx);
7381                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7382                             NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7383                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7384                         mtx_unlock(&nmp->nm_mtx);
7385 #endif
7386                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7387                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7388                             flp->nfsfl_fflags, flp->nfsfl_statshint);
7389                 } else {
7390                         error = NFSERR_BADXDR;
7391                         goto nfsmout;
7392                 }
7393                 if (flp->nfsfl_iomode == gotiomode) {
7394                         /* Keep the list in increasing offset order. */
7395                         tflp = LIST_FIRST(flhp);
7396                         prevflp = NULL;
7397                         while (tflp != NULL &&
7398                             tflp->nfsfl_off < flp->nfsfl_off) {
7399                                 prevflp = tflp;
7400                                 tflp = LIST_NEXT(tflp, nfsfl_list);
7401                         }
7402                         if (prevflp == NULL)
7403                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7404                         else
7405                                 LIST_INSERT_AFTER(prevflp, flp,
7406                                     nfsfl_list);
7407                         NFSCL_DEBUG(4, "flp inserted\n");
7408                 } else {
7409                         printf("nfscl_layoutget(): got wrong iomode\n");
7410                         nfscl_freeflayout(flp);
7411                 }
7412                 flp = NULL;
7413         }
7414 nfsmout:
7415         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7416         if (error != 0 && flp != NULL)
7417                 nfscl_freeflayout(flp);
7418         return (error);
7419 }
7420
7421 /*
7422  * Parse a user/group digit string.
7423  */
7424 static int
7425 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7426     NFSPROC_T *p)
7427 {
7428         uint32_t *tl;
7429         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7430         uint32_t len = 0;
7431         int error = 0;
7432
7433         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7434         len = fxdr_unsigned(uint32_t, *tl);
7435         str = NULL;
7436         if (len > NFSV4_OPAQUELIMIT) {
7437                 error = NFSERR_BADXDR;
7438                 goto nfsmout;
7439         }
7440         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7441         if (len == 0) {
7442                 if (dogrp != 0)
7443                         *gidp = GID_NOGROUP;
7444                 else
7445                         *uidp = UID_NOBODY;
7446                 return (0);
7447         }
7448         if (len > NFSV4_SMALLSTR)
7449                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7450         else
7451                 str = str0;
7452         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7453         NFSBCOPY(cp, str, len);
7454         str[len] = '\0';
7455         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7456         if (dogrp != 0)
7457                 error = nfsv4_strtogid(nd, str, len, gidp);
7458         else
7459                 error = nfsv4_strtouid(nd, str, len, uidp);
7460 nfsmout:
7461         if (len > NFSV4_SMALLSTR)
7462                 free(str, M_TEMP);
7463         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7464         return (error);
7465 }
7466
7467 /*
7468  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7469  * so that it does both an Open and a Layoutget.
7470  */
7471 static int
7472 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7473     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7474     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7475     struct ucred *cred, NFSPROC_T *p)
7476 {
7477         struct nfscllayout *lyp;
7478         struct nfsclflayout *flp;
7479         struct nfsclflayouthead flh;
7480         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7481         int layouttype, laystat;
7482         nfsv4stateid_t stateid;
7483         struct nfsclsession *tsep;
7484
7485         error = 0;
7486         if (NFSHASFLEXFILE(nmp))
7487                 layouttype = NFSLAYOUT_FLEXFILE;
7488         else
7489                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7490         /*
7491          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7492          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7493          * flp == NULL.
7494          */
7495         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7496             &recalled);
7497         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7498         if (lyp == NULL)
7499                 islocked = 0;
7500         else if (flp != NULL)
7501                 islocked = 1;
7502         else
7503                 islocked = 2;
7504         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7505                 LIST_INIT(&flh);
7506                 tsep = nfsmnt_mdssession(nmp);
7507                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7508                     3 * NFSX_UNSIGNED);
7509                 if (lyp == NULL)
7510                         usecurstateid = 1;
7511                 else {
7512                         usecurstateid = 0;
7513                         stateid.seqid = lyp->nfsly_stateid.seqid;
7514                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7515                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7516                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7517                 }
7518                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7519                     newfhp, newfhlen, mode, op, name, namelen,
7520                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7521                     &retonclose, &flh, &laystat, cred, p);
7522                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7523                     laystat, error);
7524                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7525                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7526                     &islocked, cred, p);
7527         } else
7528                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7529                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7530         if (islocked == 2)
7531                 nfscl_rellayout(lyp, 1);
7532         else if (islocked == 1)
7533                 nfscl_rellayout(lyp, 0);
7534         return (error);
7535 }
7536
7537 /*
7538  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7539  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7540  * handled by nfsrpc_openrpc().
7541  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7542  * can be NULL.
7543  */
7544 static int
7545 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7546     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7547     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7548     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7549     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7550     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7551 {
7552         uint32_t *tl;
7553         struct nfsrv_descript nfsd, *nd = &nfsd;
7554         struct nfscldeleg *ndp = NULL;
7555         struct nfsvattr nfsva;
7556         struct nfsclsession *tsep;
7557         uint32_t rflags, deleg;
7558         nfsattrbit_t attrbits;
7559         int error, ret, acesize, limitby, iomode;
7560
7561         *dpp = NULL;
7562         *laystatp = ENXIO;
7563         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7564             0, 0, false);
7565         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7566         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7567         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7568         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7569         tsep = nfsmnt_mdssession(nmp);
7570         *tl++ = tsep->nfsess_clientid.lval[0];
7571         *tl = tsep->nfsess_clientid.lval[1];
7572         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7573         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7574         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7575         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7576         nfsm_strtom(nd, name, namelen);
7577         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7578         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7579         NFSZERO_ATTRBIT(&attrbits);
7580         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7581         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7582         nfsrv_putattrbit(nd, &attrbits);
7583         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7584         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7585         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7586                 iomode = NFSLAYOUTIOMODE_RW;
7587         else
7588                 iomode = NFSLAYOUTIOMODE_READ;
7589         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7590             layouttype, layoutlen, usecurstateid);
7591         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7592             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7593         if (error != 0)
7594                 return (error);
7595         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7596         if (nd->nd_repstat != 0)
7597                 *laystatp = nd->nd_repstat;
7598         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7599                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7600                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7601                     6 * NFSX_UNSIGNED);
7602                 op->nfso_stateid.seqid = *tl++;
7603                 op->nfso_stateid.other[0] = *tl++;
7604                 op->nfso_stateid.other[1] = *tl++;
7605                 op->nfso_stateid.other[2] = *tl;
7606                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7607                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7608                 if (error != 0)
7609                         goto nfsmout;
7610                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7611                 deleg = fxdr_unsigned(u_int32_t, *tl);
7612                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7613                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7614                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7615                               NFSCLFLAGS_FIRSTDELEG))
7616                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7617                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7618                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7619                             M_NFSCLDELEG, M_WAITOK);
7620                         LIST_INIT(&ndp->nfsdl_owner);
7621                         LIST_INIT(&ndp->nfsdl_lock);
7622                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7623                         ndp->nfsdl_fhlen = newfhlen;
7624                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7625                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7626                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7627                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7628                             NFSX_UNSIGNED);
7629                         ndp->nfsdl_stateid.seqid = *tl++;
7630                         ndp->nfsdl_stateid.other[0] = *tl++;
7631                         ndp->nfsdl_stateid.other[1] = *tl++;
7632                         ndp->nfsdl_stateid.other[2] = *tl++;
7633                         ret = fxdr_unsigned(int, *tl);
7634                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7635                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7636                                 /*
7637                                  * Indicates how much the file can grow.
7638                                  */
7639                                 NFSM_DISSECT(tl, u_int32_t *,
7640                                     3 * NFSX_UNSIGNED);
7641                                 limitby = fxdr_unsigned(int, *tl++);
7642                                 switch (limitby) {
7643                                 case NFSV4OPEN_LIMITSIZE:
7644                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7645                                         break;
7646                                 case NFSV4OPEN_LIMITBLOCKS:
7647                                         ndp->nfsdl_sizelimit =
7648                                             fxdr_unsigned(u_int64_t, *tl++);
7649                                         ndp->nfsdl_sizelimit *=
7650                                             fxdr_unsigned(u_int64_t, *tl);
7651                                         break;
7652                                 default:
7653                                         error = NFSERR_BADXDR;
7654                                         goto nfsmout;
7655                                 };
7656                         } else
7657                                 ndp->nfsdl_flags = NFSCLDL_READ;
7658                         if (ret != 0)
7659                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7660                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7661                             &acesize, p);
7662                         if (error != 0)
7663                                 goto nfsmout;
7664                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7665                         error = NFSERR_BADXDR;
7666                         goto nfsmout;
7667                 }
7668                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7669                     nfscl_assumeposixlocks)
7670                         op->nfso_posixlock = 1;
7671                 else
7672                         op->nfso_posixlock = 0;
7673                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7674                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7675                 if (*++tl == 0) {
7676                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7677                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7678                             NULL, NULL, NULL, p, cred);
7679                         if (error != 0)
7680                                 goto nfsmout;
7681                         if (ndp != NULL) {
7682                                 ndp->nfsdl_change = nfsva.na_filerev;
7683                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7684                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7685                                 *dpp = ndp;
7686                                 ndp = NULL;
7687                         }
7688                         /*
7689                          * At this point, the Open has succeeded, so set
7690                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7691                          * this function just won't return a layout.
7692                          */
7693                         if (nd->nd_repstat == 0) {
7694                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7695                                 *laystatp = fxdr_unsigned(int, *++tl);
7696                                 if (*laystatp == 0) {
7697                                         error = nfsrv_parselayoutget(nmp, nd,
7698                                             stateidp, retonclosep, flhp);
7699                                         if (error != 0)
7700                                                 *laystatp = error;
7701                                 }
7702                         } else
7703                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7704                 }
7705         }
7706         if (nd->nd_repstat != 0 && error == 0)
7707                 error = nd->nd_repstat;
7708 nfsmout:
7709         free(ndp, M_NFSCLDELEG);
7710         m_freem(nd->nd_mrep);
7711         return (error);
7712 }
7713
7714 /*
7715  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7716  * Used only for mounts with pNFS enabled.
7717  */
7718 static int
7719 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7720     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7721     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7722     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7723     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7724     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7725     struct nfsclflayouthead *flhp, int *laystatp)
7726 {
7727         uint32_t *tl;
7728         int error = 0, deleg, newone, ret, acesize, limitby;
7729         struct nfsrv_descript nfsd, *nd = &nfsd;
7730         struct nfsclopen *op;
7731         struct nfscldeleg *dp = NULL;
7732         struct nfsnode *np;
7733         struct nfsfh *nfhp;
7734         struct nfsclsession *tsep;
7735         nfsattrbit_t attrbits;
7736         nfsv4stateid_t stateid;
7737         struct nfsmount *nmp;
7738
7739         nmp = VFSTONFS(dvp->v_mount);
7740         np = VTONFS(dvp);
7741         *laystatp = ENXIO;
7742         *unlockedp = 0;
7743         *nfhpp = NULL;
7744         *dpp = NULL;
7745         *attrflagp = 0;
7746         *dattrflagp = 0;
7747         if (namelen > NFS_MAXNAMLEN)
7748                 return (ENAMETOOLONG);
7749         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7750         /*
7751          * For V4, this is actually an Open op.
7752          */
7753         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7754         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7755         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7756             NFSV4OPEN_ACCESSREAD);
7757         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7758         tsep = nfsmnt_mdssession(nmp);
7759         *tl++ = tsep->nfsess_clientid.lval[0];
7760         *tl = tsep->nfsess_clientid.lval[1];
7761         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7762         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7763         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7764         if ((fmode & O_EXCL) != 0) {
7765                 if (NFSHASSESSPERSIST(nmp)) {
7766                         /* Use GUARDED for persistent sessions. */
7767                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
7768                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7769                 } else {
7770                         /* Otherwise, use EXCLUSIVE4_1. */
7771                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7772                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7773                         *tl++ = cverf.lval[0];
7774                         *tl = cverf.lval[1];
7775                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7776                 }
7777         } else {
7778                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7779                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
7780         }
7781         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7782         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7783         nfsm_strtom(nd, name, namelen);
7784         /* Get the new file's handle and attributes, plus save the FH. */
7785         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7786         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7787         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
7788         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7789         NFSGETATTR_ATTRBIT(&attrbits);
7790         nfsrv_putattrbit(nd, &attrbits);
7791         /* Get the directory's post-op attributes. */
7792         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7793         *tl = txdr_unsigned(NFSV4OP_PUTFH);
7794         nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7795         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7796         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7797         nfsrv_putattrbit(nd, &attrbits);
7798         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7799         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7800         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7801         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7802             layouttype, layoutlen, usecurstateid);
7803         error = nfscl_request(nd, dvp, p, cred, dstuff);
7804         if (error != 0)
7805                 return (error);
7806         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7807             error);
7808         if (nd->nd_repstat != 0)
7809                 *laystatp = nd->nd_repstat;
7810         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7811         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7812                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7813                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7814                     6 * NFSX_UNSIGNED);
7815                 stateid.seqid = *tl++;
7816                 stateid.other[0] = *tl++;
7817                 stateid.other[1] = *tl++;
7818                 stateid.other[2] = *tl;
7819                 nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7820                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7821                 deleg = fxdr_unsigned(int, *tl);
7822                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7823                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7824                         if (!(owp->nfsow_clp->nfsc_flags &
7825                               NFSCLFLAGS_FIRSTDELEG))
7826                                 owp->nfsow_clp->nfsc_flags |=
7827                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7828                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7829                             M_NFSCLDELEG, M_WAITOK);
7830                         LIST_INIT(&dp->nfsdl_owner);
7831                         LIST_INIT(&dp->nfsdl_lock);
7832                         dp->nfsdl_clp = owp->nfsow_clp;
7833                         newnfs_copyincred(cred, &dp->nfsdl_cred);
7834                         nfscl_lockinit(&dp->nfsdl_rwlock);
7835                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7836                             NFSX_UNSIGNED);
7837                         dp->nfsdl_stateid.seqid = *tl++;
7838                         dp->nfsdl_stateid.other[0] = *tl++;
7839                         dp->nfsdl_stateid.other[1] = *tl++;
7840                         dp->nfsdl_stateid.other[2] = *tl++;
7841                         ret = fxdr_unsigned(int, *tl);
7842                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7843                                 dp->nfsdl_flags = NFSCLDL_WRITE;
7844                                 /*
7845                                  * Indicates how much the file can grow.
7846                                  */
7847                                 NFSM_DISSECT(tl, u_int32_t *,
7848                                     3 * NFSX_UNSIGNED);
7849                                 limitby = fxdr_unsigned(int, *tl++);
7850                                 switch (limitby) {
7851                                 case NFSV4OPEN_LIMITSIZE:
7852                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
7853                                         break;
7854                                 case NFSV4OPEN_LIMITBLOCKS:
7855                                         dp->nfsdl_sizelimit =
7856                                             fxdr_unsigned(u_int64_t, *tl++);
7857                                         dp->nfsdl_sizelimit *=
7858                                             fxdr_unsigned(u_int64_t, *tl);
7859                                         break;
7860                                 default:
7861                                         error = NFSERR_BADXDR;
7862                                         goto nfsmout;
7863                                 };
7864                         } else {
7865                                 dp->nfsdl_flags = NFSCLDL_READ;
7866                         }
7867                         if (ret != 0)
7868                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
7869                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7870                             &acesize, p);
7871                         if (error != 0)
7872                                 goto nfsmout;
7873                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7874                         error = NFSERR_BADXDR;
7875                         goto nfsmout;
7876                 }
7877
7878                 /* Now, we should have the status for the SaveFH. */
7879                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7880                 if (*++tl == 0) {
7881                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7882                         /*
7883                          * Now, process the GetFH and Getattr for the newly
7884                          * created file. nfscl_mtofh() will set
7885                          * ND_NOMOREDATA if these weren't successful.
7886                          */
7887                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7888                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7889                         if (error != 0)
7890                                 goto nfsmout;
7891                 } else
7892                         nd->nd_flag |= ND_NOMOREDATA;
7893                 /* Now we have the PutFH and Getattr for the directory. */
7894                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7895                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7896                         if (*++tl != 0)
7897                                 nd->nd_flag |= ND_NOMOREDATA;
7898                         else {
7899                                 NFSM_DISSECT(tl, uint32_t *, 2 *
7900                                     NFSX_UNSIGNED);
7901                                 if (*++tl != 0)
7902                                         nd->nd_flag |= ND_NOMOREDATA;
7903                         }
7904                 }
7905                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7906                         /* Load the directory attributes. */
7907                         error = nfsm_loadattr(nd, dnap);
7908                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7909                         if (error != 0)
7910                                 goto nfsmout;
7911                         *dattrflagp = 1;
7912                         if (dp != NULL && *attrflagp != 0) {
7913                                 dp->nfsdl_change = nnap->na_filerev;
7914                                 dp->nfsdl_modtime = nnap->na_mtime;
7915                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7916                         }
7917                         /*
7918                          * We can now complete the Open state.
7919                          */
7920                         nfhp = *nfhpp;
7921                         if (dp != NULL) {
7922                                 dp->nfsdl_fhlen = nfhp->nfh_len;
7923                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7924                                     nfhp->nfh_len);
7925                         }
7926                         /*
7927                          * Get an Open structure that will be
7928                          * attached to the OpenOwner, acquired already.
7929                          */
7930                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
7931                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7932                             cred, p, NULL, &op, &newone, NULL, 0);
7933                         if (error != 0)
7934                                 goto nfsmout;
7935                         op->nfso_stateid = stateid;
7936                         newnfs_copyincred(cred, &op->nfso_cred);
7937         
7938                         nfscl_openrelease(nmp, op, error, newone);
7939                         *unlockedp = 1;
7940
7941                         /* Now, handle the RestoreFH and LayoutGet. */
7942                         if (nd->nd_repstat == 0) {
7943                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7944                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
7945                                 if (*laystatp == 0) {
7946                                         error = nfsrv_parselayoutget(nmp, nd,
7947                                             stateidp, retonclosep, flhp);
7948                                         if (error != 0)
7949                                                 *laystatp = error;
7950                                 }
7951                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7952                                     error);
7953                         } else
7954                                 nd->nd_repstat = 0;
7955                 }
7956         }
7957         if (nd->nd_repstat != 0 && error == 0)
7958                 error = nd->nd_repstat;
7959         if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7960                 nfscl_initiate_recovery(owp->nfsow_clp);
7961 nfsmout:
7962         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7963         if (error == 0)
7964                 *dpp = dp;
7965         else
7966                 free(dp, M_NFSCLDELEG);
7967         m_freem(nd->nd_mrep);
7968         return (error);
7969 }
7970
7971 /*
7972  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7973  */
7974 static int
7975 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7976     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7977     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7978     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7979     int *dattrflagp, void *dstuff, int *unlockedp)
7980 {
7981         struct nfscllayout *lyp;
7982         struct nfsclflayouthead flh;
7983         struct nfsfh *nfhp;
7984         struct nfsclsession *tsep;
7985         struct nfsmount *nmp;
7986         nfsv4stateid_t stateid;
7987         int error, layoutlen, layouttype, retonclose, laystat;
7988
7989         error = 0;
7990         nmp = VFSTONFS(dvp->v_mount);
7991         if (NFSHASFLEXFILE(nmp))
7992                 layouttype = NFSLAYOUT_FLEXFILE;
7993         else
7994                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7995         LIST_INIT(&flh);
7996         tsep = nfsmnt_mdssession(nmp);
7997         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7998         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7999             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8000             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8001             &flh, &laystat);
8002         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8003             laystat, error);
8004         lyp = NULL;
8005         if (laystat == 0) {
8006                 nfhp = *nfhpp;
8007                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8008                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8009                     layouttype, laystat, NULL, cred, p);
8010         } else
8011                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8012                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8013                     cred, p);
8014         if (laystat == 0)
8015                 nfscl_rellayout(lyp, 0);
8016         return (error);
8017 }
8018
8019 /*
8020  * Process the results of a layoutget() operation.
8021  */
8022 static int
8023 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8024     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8025     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8026     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8027 {
8028         struct nfsclflayout *tflp;
8029         struct nfscldevinfo *dip;
8030         uint8_t *dev;
8031         int i, mirrorcnt;
8032
8033         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8034                 NFSLOCKMNT(nmp);
8035                 if (!NFSHASFLEXFILE(nmp)) {
8036                         /* Switch to using Flex File Layout. */
8037                         nmp->nm_state |= NFSSTA_FLEXFILE;
8038                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
8039                         /* Disable pNFS. */
8040                         NFSCL_DEBUG(1, "disable PNFS\n");
8041                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8042                 }
8043                 NFSUNLOCKMNT(nmp);
8044         }
8045         if (laystat == 0) {
8046                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8047                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
8048                         if (layouttype == NFSLAYOUT_FLEXFILE)
8049                                 mirrorcnt = tflp->nfsfl_mirrorcnt;
8050                         else
8051                                 mirrorcnt = 1;
8052                         for (i = 0; i < mirrorcnt; i++) {
8053                                 laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8054                                 NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8055                                 if (laystat != 0) {
8056                                         if (layouttype == NFSLAYOUT_FLEXFILE)
8057                                                 dev = tflp->nfsfl_ffm[i].dev;
8058                                         else
8059                                                 dev = tflp->nfsfl_dev;
8060                                         laystat = nfsrpc_getdeviceinfo(nmp, dev,
8061                                             layouttype, notifybit, &dip, cred,
8062                                             p);
8063                                         NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8064                                             laystat);
8065                                         if (laystat != 0)
8066                                                 goto out;
8067                                         laystat = nfscl_adddevinfo(nmp, dip, i,
8068                                             tflp);
8069                                         if (laystat != 0)
8070                                                 printf("nfsrpc_layoutgetresout"
8071                                                     ": cannot add\n");
8072                                 }
8073                         }
8074                 }
8075         }
8076 out:
8077         if (laystat == 0) {
8078                 /*
8079                  * nfscl_layout() always returns with the nfsly_lock
8080                  * set to a refcnt (shared lock).
8081                  * Passing in dvp is sufficient, since it is only used to
8082                  * get the fsid for the file system.
8083                  */
8084                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8085                     layouttype, retonclose, flhp, lypp, cred, p);
8086                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8087                     laystat);
8088                 if (laystat == 0 && islockedp != NULL)
8089                         *islockedp = 1;
8090         }
8091         return (laystat);
8092 }
8093
8094 /*
8095  * nfs copy_file_range operation.
8096  */
8097 int
8098 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8099     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8100     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8101     struct ucred *cred, bool consecutive, bool *must_commitp)
8102 {
8103         int commit, error, expireret = 0, retrycnt;
8104         u_int32_t clidrev = 0;
8105         struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8106         struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8107         nfsv4stateid_t instateid, outstateid;
8108         void *inlckp, *outlckp;
8109
8110         if (nmp->nm_clp != NULL)
8111                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8112         innfhp = VTONFS(invp)->n_fhp;
8113         outnfhp = VTONFS(outvp)->n_fhp;
8114         retrycnt = 0;
8115         do {
8116                 /* Get both stateids. */
8117                 inlckp = NULL;
8118                 nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8119                     NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8120                     &inlckp);
8121                 outlckp = NULL;
8122                 nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8123                     NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8124                     &outlckp);
8125
8126                 error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8127                     &instateid, &outstateid, innap, inattrflagp, outnap,
8128                     outattrflagp, consecutive, &commit, cred, curthread);
8129                 if (error == 0) {
8130                         if (commit != NFSWRITE_FILESYNC)
8131                                 *must_commitp = true;
8132                         *inoffp += *lenp;
8133                         *outoffp += *lenp;
8134                 } else if (error == NFSERR_STALESTATEID)
8135                         nfscl_initiate_recovery(nmp->nm_clp);
8136                 if (inlckp != NULL)
8137                         nfscl_lockderef(inlckp);
8138                 if (outlckp != NULL)
8139                         nfscl_lockderef(outlckp);
8140                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8141                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8142                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8143                         (void) nfs_catnap(PZERO, error, "nfs_cfr");
8144                 } else if ((error == NFSERR_EXPIRED ||
8145                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8146                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8147                             curthread);
8148                 }
8149                 retrycnt++;
8150         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8151             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8152               error == NFSERR_STALEDONTRECOVER ||
8153             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8154             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8155              expireret == 0 && clidrev != 0 && retrycnt < 4));
8156         if (error != 0 && (retrycnt >= 4 ||
8157             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8158               error == NFSERR_STALEDONTRECOVER))
8159                 error = EIO;
8160         return (error);
8161 }
8162
8163 /*
8164  * The copy RPC.
8165  */
8166 static int
8167 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8168     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8169     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8170     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8171     NFSPROC_T *p)
8172 {
8173         uint32_t *tl;
8174         int error;
8175         struct nfsrv_descript nfsd;
8176         struct nfsrv_descript *nd = &nfsd;
8177         struct nfsmount *nmp;
8178         nfsattrbit_t attrbits;
8179         uint64_t len;
8180
8181         nmp = VFSTONFS(outvp->v_mount);
8182         *inattrflagp = *outattrflagp = 0;
8183         *commitp = NFSWRITE_UNSTABLE;
8184         len = *lenp;
8185         *lenp = 0;
8186         if (len > nfs_maxcopyrange)
8187                 len = nfs_maxcopyrange;
8188         NFSCL_REQSTART(nd, NFSPROC_COPY, invp);
8189         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8190         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8191         NFSGETATTR_ATTRBIT(&attrbits);
8192         nfsrv_putattrbit(nd, &attrbits);
8193         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8194         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8195         nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh,
8196             VTONFS(outvp)->n_fhp->nfh_len, 0);
8197         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8198         *tl = txdr_unsigned(NFSV4OP_COPY);
8199         nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8200         nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8201         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8202         txdr_hyper(inoff, tl); tl += 2;
8203         txdr_hyper(outoff, tl); tl += 2;
8204         txdr_hyper(len, tl); tl += 2;
8205         if (consecutive)
8206                 *tl++ = newnfs_true;
8207         else
8208                 *tl++ = newnfs_false;
8209         *tl++ = newnfs_true;
8210         *tl++ = 0;
8211         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8212         NFSWRITEGETATTR_ATTRBIT(&attrbits);
8213         nfsrv_putattrbit(nd, &attrbits);
8214         error = nfscl_request(nd, invp, p, cred, NULL);
8215         if (error != 0)
8216                 return (error);
8217         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8218                 /* Get the input file's attributes. */
8219                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8220                 if (*(tl + 1) == 0) {
8221                         error = nfsm_loadattr(nd, innap);
8222                         if (error != 0)
8223                                 goto nfsmout;
8224                         *inattrflagp = 1;
8225                 } else
8226                         nd->nd_flag |= ND_NOMOREDATA;
8227         }
8228         /* Skip over return stat for PutFH. */
8229         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8230                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8231                 if (*++tl != 0)
8232                         nd->nd_flag |= ND_NOMOREDATA;
8233         }
8234         /* Skip over return stat for Copy. */
8235         if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8236                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8237         if (nd->nd_repstat == 0) {
8238                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8239                 if (*tl != 0) {
8240                         /* There should be no callback ids. */
8241                         error = NFSERR_BADXDR;
8242                         goto nfsmout;
8243                 }
8244                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8245                     NFSX_VERF);
8246                 len = fxdr_hyper(tl); tl += 2;
8247                 *commitp = fxdr_unsigned(int, *tl++);
8248                 NFSLOCKMNT(nmp);
8249                 if (!NFSHASWRITEVERF(nmp)) {
8250                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8251                         NFSSETWRITEVERF(nmp);
8252                 } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8253                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8254                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
8255                 }
8256                 NFSUNLOCKMNT(nmp);
8257                 tl += (NFSX_VERF / NFSX_UNSIGNED);
8258                 if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8259                         /* Must be a synchronous copy. */
8260                         nd->nd_repstat = NFSERR_NOTSUPP;
8261                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8262                 error = nfsm_loadattr(nd, outnap);
8263                 if (error == 0)
8264                         *outattrflagp = NFS_LATTR_NOSHRINK;
8265                 if (nd->nd_repstat == 0)
8266                         *lenp = len;
8267         } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8268                 /*
8269                  * For the case where consecutive is not supported, but
8270                  * synchronous is supported, we can try consecutive == false
8271                  * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8272                  * since Copy cannot be done.
8273                  */
8274                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8275                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8276                         if (!consecutive || *++tl == newnfs_false)
8277                                 nd->nd_repstat = NFSERR_NOTSUPP;
8278                 } else
8279                         nd->nd_repstat = NFSERR_BADXDR;
8280         }
8281         if (error == 0)
8282                 error = nd->nd_repstat;
8283 nfsmout:
8284         m_freem(nd->nd_mrep);
8285         return (error);
8286 }
8287
8288 /*
8289  * Seek operation.
8290  */
8291 int
8292 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8293     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8294 {
8295         int error, expireret = 0, retrycnt;
8296         u_int32_t clidrev = 0;
8297         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8298         struct nfsnode *np = VTONFS(vp);
8299         struct nfsfh *nfhp = NULL;
8300         nfsv4stateid_t stateid;
8301         void *lckp;
8302
8303         if (nmp->nm_clp != NULL)
8304                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8305         nfhp = np->n_fhp;
8306         retrycnt = 0;
8307         do {
8308                 lckp = NULL;
8309                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8310                     NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8311                 error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8312                     nap, attrflagp, cred);
8313                 if (error == NFSERR_STALESTATEID)
8314                         nfscl_initiate_recovery(nmp->nm_clp);
8315                 if (lckp != NULL)
8316                         nfscl_lockderef(lckp);
8317                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8318                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8319                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8320                         (void) nfs_catnap(PZERO, error, "nfs_seek");
8321                 } else if ((error == NFSERR_EXPIRED ||
8322                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8323                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8324                             curthread);
8325                 }
8326                 retrycnt++;
8327         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8328             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8329             error == NFSERR_BADSESSION ||
8330             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8331             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8332              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8333             (error == NFSERR_OPENMODE && retrycnt < 4));
8334         if (error && retrycnt >= 4)
8335                 error = EIO;
8336         return (error);
8337 }
8338
8339 /*
8340  * The seek RPC.
8341  */
8342 static int
8343 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8344     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8345 {
8346         uint32_t *tl;
8347         int error;
8348         struct nfsrv_descript nfsd;
8349         struct nfsrv_descript *nd = &nfsd;
8350         nfsattrbit_t attrbits;
8351
8352         *attrflagp = 0;
8353         NFSCL_REQSTART(nd, NFSPROC_SEEK, vp);
8354         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8355         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8356         txdr_hyper(*offp, tl); tl += 2;
8357         *tl++ = txdr_unsigned(content);
8358         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8359         NFSGETATTR_ATTRBIT(&attrbits);
8360         nfsrv_putattrbit(nd, &attrbits);
8361         error = nfscl_request(nd, vp, curthread, cred, NULL);
8362         if (error != 0)
8363                 return (error);
8364         if (nd->nd_repstat == 0) {
8365                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8366                 if (*tl++ == newnfs_true)
8367                         *eofp = true;
8368                 else
8369                         *eofp = false;
8370                 *offp = fxdr_hyper(tl);
8371                 /* Just skip over Getattr op status. */
8372                 error = nfsm_loadattr(nd, nap);
8373                 if (error == 0)
8374                         *attrflagp = 1;
8375         }
8376         error = nd->nd_repstat;
8377 nfsmout:
8378         m_freem(nd->nd_mrep);
8379         return (error);
8380 }
8381
8382 /*
8383  * The getextattr RPC.
8384  */
8385 int
8386 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8387     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8388 {
8389         uint32_t *tl;
8390         int error;
8391         struct nfsrv_descript nfsd;
8392         struct nfsrv_descript *nd = &nfsd;
8393         nfsattrbit_t attrbits;
8394         uint32_t len, len2;
8395
8396         *attrflagp = 0;
8397         NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp);
8398         nfsm_strtom(nd, name, strlen(name));
8399         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8400         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8401         NFSGETATTR_ATTRBIT(&attrbits);
8402         nfsrv_putattrbit(nd, &attrbits);
8403         error = nfscl_request(nd, vp, p, cred, NULL);
8404         if (error != 0)
8405                 return (error);
8406         if (nd->nd_repstat == 0) {
8407                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8408                 len = fxdr_unsigned(uint32_t, *tl);
8409                 /* Sanity check lengths. */
8410                 if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8411                     uiop->uio_resid <= UINT32_MAX) {
8412                         len2 = uiop->uio_resid;
8413                         if (len2 >= len)
8414                                 error = nfsm_mbufuio(nd, uiop, len);
8415                         else {
8416                                 error = nfsm_mbufuio(nd, uiop, len2);
8417                                 if (error == 0) {
8418                                         /*
8419                                          * nfsm_mbufuio() advances to a multiple
8420                                          * of 4, so round up len2 as well.  Then
8421                                          * we need to advance over the rest of
8422                                          * the data, rounding up the remaining
8423                                          * length.
8424                                          */
8425                                         len2 = NFSM_RNDUP(len2);
8426                                         len2 = NFSM_RNDUP(len - len2);
8427                                         if (len2 > 0)
8428                                                 error = nfsm_advance(nd, len2,
8429                                                     -1);
8430                                 }
8431                         }
8432                 } else if (uiop == NULL && len > 0) {
8433                         /* Just wants the length and not the data. */
8434                         error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8435                 } else if (len > 0)
8436                         error = ENOATTR;
8437                 if (error != 0)
8438                         goto nfsmout;
8439                 *lenp = len;
8440                 /* Just skip over Getattr op status. */
8441                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8442                 error = nfsm_loadattr(nd, nap);
8443                 if (error == 0)
8444                         *attrflagp = 1;
8445         }
8446         if (error == 0)
8447                 error = nd->nd_repstat;
8448 nfsmout:
8449         m_freem(nd->nd_mrep);
8450         return (error);
8451 }
8452
8453 /*
8454  * The setextattr RPC.
8455  */
8456 int
8457 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8458     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8459 {
8460         uint32_t *tl;
8461         int error;
8462         struct nfsrv_descript nfsd;
8463         struct nfsrv_descript *nd = &nfsd;
8464         nfsattrbit_t attrbits;
8465
8466         *attrflagp = 0;
8467         NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp);
8468         if (uiop->uio_resid > nd->nd_maxreq) {
8469                 /* nd_maxreq is set by NFSCL_REQSTART(). */
8470                 m_freem(nd->nd_mreq);
8471                 return (EINVAL);
8472         }
8473         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8474         *tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8475         nfsm_strtom(nd, name, strlen(name));
8476         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8477         *tl = txdr_unsigned(uiop->uio_resid);
8478         nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8479         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8480         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8481         NFSGETATTR_ATTRBIT(&attrbits);
8482         nfsrv_putattrbit(nd, &attrbits);
8483         error = nfscl_request(nd, vp, p, cred, NULL);
8484         if (error != 0)
8485                 return (error);
8486         if (nd->nd_repstat == 0) {
8487                 /* Just skip over the reply and Getattr op status. */
8488                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8489                     NFSX_UNSIGNED);
8490                 error = nfsm_loadattr(nd, nap);
8491                 if (error == 0)
8492                         *attrflagp = 1;
8493         }
8494         if (error == 0)
8495                 error = nd->nd_repstat;
8496 nfsmout:
8497         m_freem(nd->nd_mrep);
8498         return (error);
8499 }
8500
8501 /*
8502  * The removeextattr RPC.
8503  */
8504 int
8505 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8506     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8507 {
8508         uint32_t *tl;
8509         int error;
8510         struct nfsrv_descript nfsd;
8511         struct nfsrv_descript *nd = &nfsd;
8512         nfsattrbit_t attrbits;
8513
8514         *attrflagp = 0;
8515         NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp);
8516         nfsm_strtom(nd, name, strlen(name));
8517         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8518         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8519         NFSGETATTR_ATTRBIT(&attrbits);
8520         nfsrv_putattrbit(nd, &attrbits);
8521         error = nfscl_request(nd, vp, p, cred, NULL);
8522         if (error != 0)
8523                 return (error);
8524         if (nd->nd_repstat == 0) {
8525                 /* Just skip over the reply and Getattr op status. */
8526                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8527                     NFSX_UNSIGNED);
8528                 error = nfsm_loadattr(nd, nap);
8529                 if (error == 0)
8530                         *attrflagp = 1;
8531         }
8532         if (error == 0)
8533                 error = nd->nd_repstat;
8534 nfsmout:
8535         m_freem(nd->nd_mrep);
8536         return (error);
8537 }
8538
8539 /*
8540  * The listextattr RPC.
8541  */
8542 int
8543 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8544     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8545     struct ucred *cred, NFSPROC_T *p)
8546 {
8547         uint32_t *tl;
8548         int cnt, error, i, len;
8549         struct nfsrv_descript nfsd;
8550         struct nfsrv_descript *nd = &nfsd;
8551         nfsattrbit_t attrbits;
8552         u_char c;
8553
8554         *attrflagp = 0;
8555         NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp);
8556         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8557         txdr_hyper(*cookiep, tl); tl += 2;
8558         *tl++ = txdr_unsigned(*lenp);
8559         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8560         NFSGETATTR_ATTRBIT(&attrbits);
8561         nfsrv_putattrbit(nd, &attrbits);
8562         error = nfscl_request(nd, vp, p, cred, NULL);
8563         if (error != 0)
8564                 return (error);
8565         *eofp = true;
8566         *lenp = 0;
8567         if (nd->nd_repstat == 0) {
8568                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8569                 *cookiep = fxdr_hyper(tl); tl += 2;
8570                 cnt = fxdr_unsigned(int, *tl);
8571                 if (cnt < 0) {
8572                         error = EBADRPC;
8573                         goto nfsmout;
8574                 }
8575                 for (i = 0; i < cnt; i++) {
8576                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8577                         len = fxdr_unsigned(int, *tl);
8578                         if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8579                                 error = EBADRPC;
8580                                 goto nfsmout;
8581                         }
8582                         if (uiop == NULL)
8583                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8584                         else if (uiop->uio_resid >= len + 1) {
8585                                 c = len;
8586                                 error = uiomove(&c, sizeof(c), uiop);
8587                                 if (error == 0)
8588                                         error = nfsm_mbufuio(nd, uiop, len);
8589                         } else {
8590                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8591                                 *eofp = false;
8592                         }
8593                         if (error != 0)
8594                                 goto nfsmout;
8595                         *lenp += (len + 1);
8596                 }
8597                 /* Get the eof and skip over the Getattr op status. */
8598                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8599                 /*
8600                  * *eofp is set false above, because it wasn't able to copy
8601                  * all of the reply.
8602                  */
8603                 if (*eofp && *tl == 0)
8604                         *eofp = false;
8605                 error = nfsm_loadattr(nd, nap);
8606                 if (error == 0)
8607                         *attrflagp = 1;
8608         }
8609         if (error == 0)
8610                 error = nd->nd_repstat;
8611 nfsmout:
8612         m_freem(nd->nd_mrep);
8613         return (error);
8614 }
8615
8616 /*
8617  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8618  */
8619 static struct mbuf *
8620 nfsm_split(struct mbuf *mp, uint64_t xfer)
8621 {
8622         struct mbuf *m, *m2;
8623         vm_page_t pg;
8624         int i, j, left, pgno, plen, trim;
8625         char *cp, *cp2;
8626
8627         if ((mp->m_flags & M_EXTPG) == 0) {
8628                 m = m_split(mp, xfer, M_WAITOK);
8629                 return (m);
8630         }
8631
8632         /* Find the correct mbuf to split at. */
8633         for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
8634                 xfer -= m->m_len;
8635         if (m == NULL)
8636                 return (NULL);
8637
8638         /* If xfer == m->m_len, we can just split the mbuf list. */
8639         if (xfer == m->m_len) {
8640                 m2 = m->m_next;
8641                 m->m_next = NULL;
8642                 return (m2);
8643         }
8644
8645         /* Find the page to split at. */
8646         pgno = 0;
8647         left = xfer;
8648         do {
8649                 if (pgno == 0)
8650                         plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
8651                 else
8652                         plen = m_epg_pagelen(m, pgno, 0);
8653                 if (left <= plen)
8654                         break;
8655                 left -= plen;
8656                 pgno++;
8657         } while (pgno < m->m_epg_npgs);
8658         if (pgno == m->m_epg_npgs)
8659                 panic("nfsm_split: eroneous ext_pgs mbuf");
8660
8661         m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
8662         m2->m_epg_flags |= EPG_FLAG_ANON;
8663
8664         /*
8665          * If left < plen, allocate a new page for the new mbuf
8666          * and copy the data after left in the page to this new
8667          * page.
8668          */
8669         if (left < plen) {
8670                 do {
8671                         pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
8672                             VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
8673                             VM_ALLOC_WIRED);
8674                         if (pg == NULL)
8675                                 vm_wait(NULL);
8676                 } while (pg == NULL);
8677                 m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
8678                 m2->m_epg_npgs = 1;
8679
8680                 /* Copy the data after left to the new page. */
8681                 trim = plen - left;
8682                 cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
8683                 if (pgno == 0)
8684                         cp += m->m_epg_1st_off;
8685                 cp += left;
8686                 cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
8687                 if (pgno == m->m_epg_npgs - 1)
8688                         m2->m_epg_last_len = trim;
8689                 else {
8690                         cp2 += PAGE_SIZE - trim;
8691                         m2->m_epg_1st_off = PAGE_SIZE - trim;
8692                         m2->m_epg_last_len = m->m_epg_last_len;
8693                 }
8694                 memcpy(cp2, cp, trim);
8695                 m2->m_len = trim;
8696         } else {
8697                 m2->m_len = 0;
8698                 m2->m_epg_last_len = m->m_epg_last_len;
8699         }
8700
8701         /* Move the pages beyond pgno to the new mbuf. */
8702         for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
8703                 m2->m_epg_pa[j] = m->m_epg_pa[i];
8704                 /* Never moves page 0. */
8705                 m2->m_len += m_epg_pagelen(m, i, 0);
8706         }
8707         m2->m_epg_npgs = j;
8708         m->m_epg_npgs = pgno + 1;
8709         m->m_epg_last_len = left;
8710         m->m_len = xfer;
8711
8712         m2->m_next = m->m_next;
8713         m->m_next = NULL;
8714         return (m2);
8715 }