]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
nfsclient: fix panic in cache_enter_time()
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46
47 #include "opt_inet6.h"
48
49 #include <fs/nfs/nfsport.h>
50 #include <fs/nfsclient/nfs.h>
51 #include <sys/extattr.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54
55 SYSCTL_DECL(_vfs_nfs);
56
57 static int      nfsignore_eexist = 0;
58 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
59     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
60
61 static int      nfscl_dssameconn = 0;
62 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
63     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
64
65 /*
66  * Global variables
67  */
68 extern int nfs_numnfscbd;
69 extern struct timeval nfsboottime;
70 extern u_int32_t newnfs_false, newnfs_true;
71 extern nfstype nfsv34_type[9];
72 extern int nfsrv_useacl;
73 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
74 extern int nfscl_debuglevel;
75 extern int nfs_pnfsiothreads;
76 extern u_long sb_max_adj;
77 extern int nfs_maxcopyrange;
78 NFSCLSTATEMUTEX;
79 int nfstest_outofseq = 0;
80 int nfscl_assumeposixlocks = 1;
81 int nfscl_enablecallb = 0;
82 short nfsv4_cbport = NFSV4_CBPORT;
83 int nfstest_openallsetattr = 0;
84
85 #define DIRHDSIZ        offsetof(struct dirent, d_name)
86
87 /*
88  * nfscl_getsameserver() can return one of three values:
89  * NFSDSP_USETHISSESSION - Use this session for the DS.
90  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
91  *     session.
92  * NFSDSP_NOTFOUND - No matching server was found.
93  */
94 enum nfsclds_state {
95         NFSDSP_USETHISSESSION = 0,
96         NFSDSP_SEQTHISSESSION = 1,
97         NFSDSP_NOTFOUND = 2,
98 };
99
100 /*
101  * Do a write RPC on a DS data file, using this structure for the arguments,
102  * so that this function can be executed by a separate kernel process.
103  */
104 struct nfsclwritedsdorpc {
105         int                     done;
106         int                     inprog;
107         struct task             tsk;
108         struct vnode            *vp;
109         int                     iomode;
110         int                     must_commit;
111         nfsv4stateid_t          *stateidp;
112         struct nfsclds          *dsp;
113         uint64_t                off;
114         int                     len;
115 #ifdef notyet
116         int                     advise;
117 #endif
118         struct nfsfh            *fhp;
119         struct mbuf             *m;
120         int                     vers;
121         int                     minorvers;
122         struct ucred            *cred;
123         NFSPROC_T               *p;
124         int                     err;
125 };
126
127 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
128     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
129 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
130     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
131 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
132     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
133     void *);
134 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
135     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
136     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
137 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
138     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
139     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
140     int *, void *, int *);
141 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
142     struct nfscllockowner *, u_int64_t, u_int64_t,
143     u_int32_t, struct ucred *, NFSPROC_T *, int);
144 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
145     struct acl *, nfsv4stateid_t *, void *);
146 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
147     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
148     struct ucred *, NFSPROC_T *);
149 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
150     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
151     NFSPROC_T *);
152 static void nfscl_initsessionslots(struct nfsclsession *);
153 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
154     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
155     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
156     NFSPROC_T *);
157 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
158     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
159     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
160     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
161 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
162     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
163     struct ucred *, NFSPROC_T *);
164 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
165     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
166     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
167 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
168     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
169     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
170 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
171     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
172     struct ucred *, NFSPROC_T *);
173 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
174     struct nfsclds *, struct nfsclds **, uint32_t *);
175 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
176     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
177     NFSPROC_T *);
178 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
179     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
180 #ifdef notyet
181 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
182     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
183     NFSPROC_T *);
184 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
185     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
186 #endif
187 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
188     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
189 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
190     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
191 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
192     NFSPROC_T *);
193 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
194     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
195 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
196     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
197     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
198 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
199     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
200     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
201     struct nfsfh **, int *, int *, void *, int *);
202 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
205     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
206 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
207     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
208     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
209     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
210     int, int, int, int *, struct nfsclflayouthead *, int *);
211 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
212     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
213     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
214 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
215     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
216     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
217 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
218     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
219     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
220 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
221     int, struct nfsvattr *, int *, struct ucred *);
222 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
223
224 int nfs_pnfsio(task_fn_t *, void *);
225
226 /*
227  * nfs null call from vfs.
228  */
229 int
230 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
231 {
232         int error;
233         struct nfsrv_descript nfsd, *nd = &nfsd;
234
235         NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
236         error = nfscl_request(nd, vp, p, cred, NULL);
237         if (nd->nd_repstat && !error)
238                 error = nd->nd_repstat;
239         m_freem(nd->nd_mrep);
240         return (error);
241 }
242
243 /*
244  * nfs access rpc op.
245  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
246  * modes are changed on the server, accesses might still fail later.
247  */
248 int
249 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
250     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
251 {
252         int error;
253         u_int32_t mode, rmode;
254
255         if (acmode & VREAD)
256                 mode = NFSACCESS_READ;
257         else
258                 mode = 0;
259         if (vnode_vtype(vp) == VDIR) {
260                 if (acmode & VWRITE)
261                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
262                                  NFSACCESS_DELETE);
263                 if (acmode & VEXEC)
264                         mode |= NFSACCESS_LOOKUP;
265         } else {
266                 if (acmode & VWRITE)
267                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
268                 if (acmode & VEXEC)
269                         mode |= NFSACCESS_EXECUTE;
270         }
271
272         /*
273          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
274          */
275         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
276             NULL);
277
278         /*
279          * The NFS V3 spec does not clarify whether or not
280          * the returned access bits can be a superset of
281          * the ones requested, so...
282          */
283         if (!error && (rmode & mode) != mode)
284                 error = EACCES;
285         return (error);
286 }
287
288 /*
289  * The actual rpc, separated out for Darwin.
290  */
291 int
292 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
293     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
294     void *stuff)
295 {
296         u_int32_t *tl;
297         u_int32_t supported, rmode;
298         int error;
299         struct nfsrv_descript nfsd, *nd = &nfsd;
300         nfsattrbit_t attrbits;
301
302         *attrflagp = 0;
303         supported = mode;
304         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
305         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
306         *tl = txdr_unsigned(mode);
307         if (nd->nd_flag & ND_NFSV4) {
308                 /*
309                  * And do a Getattr op.
310                  */
311                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
312                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
313                 NFSGETATTR_ATTRBIT(&attrbits);
314                 (void) nfsrv_putattrbit(nd, &attrbits);
315         }
316         error = nfscl_request(nd, vp, p, cred, stuff);
317         if (error)
318                 return (error);
319         if (nd->nd_flag & ND_NFSV3) {
320                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
321                 if (error)
322                         goto nfsmout;
323         }
324         if (!nd->nd_repstat) {
325                 if (nd->nd_flag & ND_NFSV4) {
326                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
327                         supported = fxdr_unsigned(u_int32_t, *tl++);
328                 } else {
329                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
330                 }
331                 rmode = fxdr_unsigned(u_int32_t, *tl);
332                 if (nd->nd_flag & ND_NFSV4)
333                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
334
335                 /*
336                  * It's not obvious what should be done about
337                  * unsupported access modes. For now, be paranoid
338                  * and clear the unsupported ones.
339                  */
340                 rmode &= supported;
341                 *rmodep = rmode;
342         } else
343                 error = nd->nd_repstat;
344 nfsmout:
345         m_freem(nd->nd_mrep);
346         return (error);
347 }
348
349 /*
350  * nfs open rpc
351  */
352 int
353 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
354 {
355         struct nfsclopen *op;
356         struct nfscldeleg *dp;
357         struct nfsfh *nfhp;
358         struct nfsnode *np = VTONFS(vp);
359         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
360         u_int32_t mode, clidrev;
361         int ret, newone, error, expireret = 0, retrycnt;
362
363         /*
364          * For NFSv4, Open Ops are only done on Regular Files.
365          */
366         if (vnode_vtype(vp) != VREG)
367                 return (0);
368         mode = 0;
369         if (amode & FREAD)
370                 mode |= NFSV4OPEN_ACCESSREAD;
371         if (amode & FWRITE)
372                 mode |= NFSV4OPEN_ACCESSWRITE;
373         nfhp = np->n_fhp;
374
375         retrycnt = 0;
376 #ifdef notdef
377 { char name[100]; int namel;
378 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
379 bcopy(NFS4NODENAME(np->n_v4), name, namel);
380 name[namel] = '\0';
381 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
382 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
383 else printf(" fhl=0\n");
384 }
385 #endif
386         do {
387             dp = NULL;
388             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
389                 cred, p, NULL, &op, &newone, &ret, 1);
390             if (error) {
391                 return (error);
392             }
393             if (nmp->nm_clp != NULL)
394                 clidrev = nmp->nm_clp->nfsc_clientidrev;
395             else
396                 clidrev = 0;
397             if (ret == NFSCLOPEN_DOOPEN) {
398                 if (np->n_v4 != NULL) {
399                         /*
400                          * For the first attempt, try and get a layout, if
401                          * pNFS is enabled for the mount.
402                          */
403                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
404                             nfs_numnfscbd == 0 ||
405                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
406                                 error = nfsrpc_openrpc(nmp, vp,
407                                     np->n_v4->n4_data,
408                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
409                                     np->n_fhp->nfh_len, mode, op,
410                                     NFS4NODENAME(np->n_v4),
411                                     np->n_v4->n4_namelen,
412                                     &dp, 0, 0x0, cred, p, 0, 0);
413                         else
414                                 error = nfsrpc_getopenlayout(nmp, vp,
415                                     np->n_v4->n4_data,
416                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
417                                     np->n_fhp->nfh_len, mode, op,
418                                     NFS4NODENAME(np->n_v4),
419                                     np->n_v4->n4_namelen, &dp, cred, p);
420                         if (dp != NULL) {
421 #ifdef APPLE
422                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
423 #else
424                                 NFSLOCKNODE(np);
425                                 np->n_flag &= ~NDELEGMOD;
426                                 /*
427                                  * Invalidate the attribute cache, so that
428                                  * attributes that pre-date the issue of a
429                                  * delegation are not cached, since the
430                                  * cached attributes will remain valid while
431                                  * the delegation is held.
432                                  */
433                                 NFSINVALATTRCACHE(np);
434                                 NFSUNLOCKNODE(np);
435 #endif
436                                 (void) nfscl_deleg(nmp->nm_mountp,
437                                     op->nfso_own->nfsow_clp,
438                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
439                         }
440                 } else {
441                         error = EIO;
442                 }
443                 newnfs_copyincred(cred, &op->nfso_cred);
444             } else if (ret == NFSCLOPEN_SETCRED)
445                 /*
446                  * This is a new local open on a delegation. It needs
447                  * to have credentials so that an open can be done
448                  * against the server during recovery.
449                  */
450                 newnfs_copyincred(cred, &op->nfso_cred);
451
452             /*
453              * nfso_opencnt is the count of how many VOP_OPEN()s have
454              * been done on this Open successfully and a VOP_CLOSE()
455              * is expected for each of these.
456              * If error is non-zero, don't increment it, since the Open
457              * hasn't succeeded yet.
458              */
459             if (!error)
460                 op->nfso_opencnt++;
461             nfscl_openrelease(nmp, op, error, newone);
462             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
463                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
464                 error == NFSERR_BADSESSION) {
465                 (void) nfs_catnap(PZERO, error, "nfs_open");
466             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
467                 && clidrev != 0) {
468                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
469                 retrycnt++;
470             }
471         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
472             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
473             error == NFSERR_BADSESSION ||
474             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
475              expireret == 0 && clidrev != 0 && retrycnt < 4));
476         if (error && retrycnt >= 4)
477                 error = EIO;
478         return (error);
479 }
480
481 /*
482  * the actual open rpc
483  */
484 int
485 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
486     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
487     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
488     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
489     int syscred, int recursed)
490 {
491         u_int32_t *tl;
492         struct nfsrv_descript nfsd, *nd = &nfsd;
493         struct nfscldeleg *dp, *ndp = NULL;
494         struct nfsvattr nfsva;
495         u_int32_t rflags, deleg;
496         nfsattrbit_t attrbits;
497         int error, ret, acesize, limitby;
498         struct nfsclsession *tsep;
499
500         dp = *dpp;
501         *dpp = NULL;
502         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
503         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
504         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
505         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
506         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
507         tsep = nfsmnt_mdssession(nmp);
508         *tl++ = tsep->nfsess_clientid.lval[0];
509         *tl = tsep->nfsess_clientid.lval[1];
510         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
511         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
512         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
513         if (reclaim) {
514                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
515                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
516                 *tl = txdr_unsigned(delegtype);
517         } else {
518                 if (dp != NULL) {
519                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
520                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
521                         if (NFSHASNFSV4N(nmp))
522                                 *tl++ = 0;
523                         else
524                                 *tl++ = dp->nfsdl_stateid.seqid;
525                         *tl++ = dp->nfsdl_stateid.other[0];
526                         *tl++ = dp->nfsdl_stateid.other[1];
527                         *tl = dp->nfsdl_stateid.other[2];
528                 } else {
529                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
530                 }
531                 (void) nfsm_strtom(nd, name, namelen);
532         }
533         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
534         *tl = txdr_unsigned(NFSV4OP_GETATTR);
535         NFSZERO_ATTRBIT(&attrbits);
536         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
537         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
538         (void) nfsrv_putattrbit(nd, &attrbits);
539         if (syscred)
540                 nd->nd_flag |= ND_USEGSSNAME;
541         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
542             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
543         if (error)
544                 return (error);
545         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
546         if (!nd->nd_repstat) {
547                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
548                     6 * NFSX_UNSIGNED);
549                 op->nfso_stateid.seqid = *tl++;
550                 op->nfso_stateid.other[0] = *tl++;
551                 op->nfso_stateid.other[1] = *tl++;
552                 op->nfso_stateid.other[2] = *tl;
553                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
554                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
555                 if (error)
556                         goto nfsmout;
557                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
558                 deleg = fxdr_unsigned(u_int32_t, *tl);
559                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
560                     deleg == NFSV4OPEN_DELEGATEWRITE) {
561                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
562                               NFSCLFLAGS_FIRSTDELEG))
563                                 op->nfso_own->nfsow_clp->nfsc_flags |=
564                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
565                         ndp = malloc(
566                             sizeof (struct nfscldeleg) + newfhlen,
567                             M_NFSCLDELEG, M_WAITOK);
568                         LIST_INIT(&ndp->nfsdl_owner);
569                         LIST_INIT(&ndp->nfsdl_lock);
570                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
571                         ndp->nfsdl_fhlen = newfhlen;
572                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
573                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
574                         nfscl_lockinit(&ndp->nfsdl_rwlock);
575                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
576                             NFSX_UNSIGNED);
577                         ndp->nfsdl_stateid.seqid = *tl++;
578                         ndp->nfsdl_stateid.other[0] = *tl++;
579                         ndp->nfsdl_stateid.other[1] = *tl++;
580                         ndp->nfsdl_stateid.other[2] = *tl++;
581                         ret = fxdr_unsigned(int, *tl);
582                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
583                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
584                                 /*
585                                  * Indicates how much the file can grow.
586                                  */
587                                 NFSM_DISSECT(tl, u_int32_t *,
588                                     3 * NFSX_UNSIGNED);
589                                 limitby = fxdr_unsigned(int, *tl++);
590                                 switch (limitby) {
591                                 case NFSV4OPEN_LIMITSIZE:
592                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
593                                         break;
594                                 case NFSV4OPEN_LIMITBLOCKS:
595                                         ndp->nfsdl_sizelimit =
596                                             fxdr_unsigned(u_int64_t, *tl++);
597                                         ndp->nfsdl_sizelimit *=
598                                             fxdr_unsigned(u_int64_t, *tl);
599                                         break;
600                                 default:
601                                         error = NFSERR_BADXDR;
602                                         goto nfsmout;
603                                 }
604                         } else {
605                                 ndp->nfsdl_flags = NFSCLDL_READ;
606                         }
607                         if (ret)
608                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
609                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
610                             &acesize, p);
611                         if (error)
612                                 goto nfsmout;
613                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
614                         error = NFSERR_BADXDR;
615                         goto nfsmout;
616                 }
617                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
618                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
619                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
620                     NULL, NULL, NULL, p, cred);
621                 if (error)
622                         goto nfsmout;
623                 if (ndp != NULL) {
624                         ndp->nfsdl_change = nfsva.na_filerev;
625                         ndp->nfsdl_modtime = nfsva.na_mtime;
626                         ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
627                 }
628                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
629                     do {
630                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
631                             cred, p);
632                         if (ret == NFSERR_DELAY)
633                             (void) nfs_catnap(PZERO, ret, "nfs_open");
634                     } while (ret == NFSERR_DELAY);
635                     error = ret;
636                 }
637                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
638                     nfscl_assumeposixlocks)
639                     op->nfso_posixlock = 1;
640                 else
641                     op->nfso_posixlock = 0;
642
643                 /*
644                  * If the server is handing out delegations, but we didn't
645                  * get one because an OpenConfirm was required, try the
646                  * Open again, to get a delegation. This is a harmless no-op,
647                  * from a server's point of view.
648                  */
649                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
650                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
651                     && !error && dp == NULL && ndp == NULL && !recursed) {
652                     do {
653                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
654                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
655                             cred, p, syscred, 1);
656                         if (ret == NFSERR_DELAY)
657                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
658                     } while (ret == NFSERR_DELAY);
659                     if (ret) {
660                         if (ndp != NULL) {
661                                 free(ndp, M_NFSCLDELEG);
662                                 ndp = NULL;
663                         }
664                         if (ret == NFSERR_STALECLIENTID ||
665                             ret == NFSERR_STALEDONTRECOVER ||
666                             ret == NFSERR_BADSESSION)
667                                 error = ret;
668                     }
669                 }
670         }
671         if (nd->nd_repstat != 0 && error == 0)
672                 error = nd->nd_repstat;
673         if (error == NFSERR_STALECLIENTID)
674                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
675 nfsmout:
676         if (!error)
677                 *dpp = ndp;
678         else if (ndp != NULL)
679                 free(ndp, M_NFSCLDELEG);
680         m_freem(nd->nd_mrep);
681         return (error);
682 }
683
684 /*
685  * open downgrade rpc
686  */
687 int
688 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
689     struct ucred *cred, NFSPROC_T *p)
690 {
691         u_int32_t *tl;
692         struct nfsrv_descript nfsd, *nd = &nfsd;
693         int error;
694
695         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
696         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
697         if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
698                 *tl++ = 0;
699         else
700                 *tl++ = op->nfso_stateid.seqid;
701         *tl++ = op->nfso_stateid.other[0];
702         *tl++ = op->nfso_stateid.other[1];
703         *tl++ = op->nfso_stateid.other[2];
704         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
705         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
706         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
707         error = nfscl_request(nd, vp, p, cred, NULL);
708         if (error)
709                 return (error);
710         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
711         if (!nd->nd_repstat) {
712                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
713                 op->nfso_stateid.seqid = *tl++;
714                 op->nfso_stateid.other[0] = *tl++;
715                 op->nfso_stateid.other[1] = *tl++;
716                 op->nfso_stateid.other[2] = *tl;
717         }
718         if (nd->nd_repstat && error == 0)
719                 error = nd->nd_repstat;
720         if (error == NFSERR_STALESTATEID)
721                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
722 nfsmout:
723         m_freem(nd->nd_mrep);
724         return (error);
725 }
726
727 /*
728  * V4 Close operation.
729  */
730 int
731 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
732 {
733         struct nfsclclient *clp;
734         int error;
735
736         if (vnode_vtype(vp) != VREG)
737                 return (0);
738         if (doclose)
739                 error = nfscl_doclose(vp, &clp, p);
740         else
741                 error = nfscl_getclose(vp, &clp);
742         if (error)
743                 return (error);
744
745         nfscl_clientrelease(clp);
746         return (0);
747 }
748
749 /*
750  * Close the open.
751  */
752 void
753 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
754 {
755         struct nfsrv_descript nfsd, *nd = &nfsd;
756         struct nfscllockowner *lp, *nlp;
757         struct nfscllock *lop, *nlop;
758         struct ucred *tcred;
759         u_int64_t off = 0, len = 0;
760         u_int32_t type = NFSV4LOCKT_READ;
761         int error, do_unlock, trycnt;
762
763         tcred = newnfs_getcred();
764         newnfs_copycred(&op->nfso_cred, tcred);
765         /*
766          * (Theoretically this could be done in the same
767          *  compound as the close, but having multiple
768          *  sequenced Ops in the same compound might be
769          *  too scary for some servers.)
770          */
771         if (op->nfso_posixlock) {
772                 off = 0;
773                 len = NFS64BITSSET;
774                 type = NFSV4LOCKT_READ;
775         }
776
777         /*
778          * Since this function is only called from VOP_INACTIVE(), no
779          * other thread will be manipulating this Open. As such, the
780          * lock lists are not being changed by other threads, so it should
781          * be safe to do this without locking.
782          */
783         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
784                 do_unlock = 1;
785                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
786                         if (op->nfso_posixlock == 0) {
787                                 off = lop->nfslo_first;
788                                 len = lop->nfslo_end - lop->nfslo_first;
789                                 if (lop->nfslo_type == F_WRLCK)
790                                         type = NFSV4LOCKT_WRITE;
791                                 else
792                                         type = NFSV4LOCKT_READ;
793                         }
794                         if (do_unlock) {
795                                 trycnt = 0;
796                                 do {
797                                         error = nfsrpc_locku(nd, nmp, lp, off,
798                                             len, type, tcred, p, 0);
799                                         if ((nd->nd_repstat == NFSERR_GRACE ||
800                                             nd->nd_repstat == NFSERR_DELAY) &&
801                                             error == 0)
802                                                 (void) nfs_catnap(PZERO,
803                                                     (int)nd->nd_repstat,
804                                                     "nfs_close");
805                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
806                                     nd->nd_repstat == NFSERR_DELAY) &&
807                                     error == 0 && trycnt++ < 5);
808                                 if (op->nfso_posixlock)
809                                         do_unlock = 0;
810                         }
811                         nfscl_freelock(lop, 0);
812                 }
813                 /*
814                  * Do a ReleaseLockOwner.
815                  * The lock owner name nfsl_owner may be used by other opens for
816                  * other files but the lock_owner4 name that nfsrpc_rellockown()
817                  * puts on the wire has the file handle for this file appended
818                  * to it, so it can be done now.
819                  */
820                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
821                     lp->nfsl_open->nfso_fhlen, tcred, p);
822         }
823
824         /*
825          * There could be other Opens for different files on the same
826          * OpenOwner, so locking is required.
827          */
828         NFSLOCKCLSTATE();
829         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
830         NFSUNLOCKCLSTATE();
831         do {
832                 error = nfscl_tryclose(op, tcred, nmp, p);
833                 if (error == NFSERR_GRACE)
834                         (void) nfs_catnap(PZERO, error, "nfs_close");
835         } while (error == NFSERR_GRACE);
836         NFSLOCKCLSTATE();
837         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
838
839         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
840                 nfscl_freelockowner(lp, 0);
841         nfscl_freeopen(op, 0);
842         NFSUNLOCKCLSTATE();
843         NFSFREECRED(tcred);
844 }
845
846 /*
847  * The actual Close RPC.
848  */
849 int
850 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
851     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
852     int syscred)
853 {
854         u_int32_t *tl;
855         int error;
856
857         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
858             op->nfso_fhlen, NULL, NULL, 0, 0);
859         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
860         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
861         if (NFSHASNFSV4N(nmp))
862                 *tl++ = 0;
863         else
864                 *tl++ = op->nfso_stateid.seqid;
865         *tl++ = op->nfso_stateid.other[0];
866         *tl++ = op->nfso_stateid.other[1];
867         *tl = op->nfso_stateid.other[2];
868         if (syscred)
869                 nd->nd_flag |= ND_USEGSSNAME;
870         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
871             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
872         if (error)
873                 return (error);
874         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
875         if (nd->nd_repstat == 0)
876                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
877         error = nd->nd_repstat;
878         if (error == NFSERR_STALESTATEID)
879                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
880 nfsmout:
881         m_freem(nd->nd_mrep);
882         return (error);
883 }
884
885 /*
886  * V4 Open Confirm RPC.
887  */
888 int
889 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
890     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
891 {
892         u_int32_t *tl;
893         struct nfsrv_descript nfsd, *nd = &nfsd;
894         struct nfsmount *nmp;
895         int error;
896
897         nmp = VFSTONFS(vp->v_mount);
898         if (NFSHASNFSV4N(nmp))
899                 return (0);             /* No confirmation for NFSv4.1. */
900         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
901             0, 0);
902         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
903         *tl++ = op->nfso_stateid.seqid;
904         *tl++ = op->nfso_stateid.other[0];
905         *tl++ = op->nfso_stateid.other[1];
906         *tl++ = op->nfso_stateid.other[2];
907         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
908         error = nfscl_request(nd, vp, p, cred, NULL);
909         if (error)
910                 return (error);
911         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
912         if (!nd->nd_repstat) {
913                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
914                 op->nfso_stateid.seqid = *tl++;
915                 op->nfso_stateid.other[0] = *tl++;
916                 op->nfso_stateid.other[1] = *tl++;
917                 op->nfso_stateid.other[2] = *tl;
918         }
919         error = nd->nd_repstat;
920         if (error == NFSERR_STALESTATEID)
921                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
922 nfsmout:
923         m_freem(nd->nd_mrep);
924         return (error);
925 }
926
927 /*
928  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
929  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
930  */
931 int
932 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
933     bool *retokp, struct ucred *cred, NFSPROC_T *p)
934 {
935         u_int32_t *tl;
936         struct nfsrv_descript nfsd;
937         struct nfsrv_descript *nd = &nfsd;
938         nfsattrbit_t attrbits;
939         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
940         u_short port;
941         int error, isinet6 = 0, callblen;
942         nfsquad_t confirm;
943         u_int32_t lease;
944         static u_int32_t rev = 0;
945         struct nfsclds *dsp, *odsp;
946         struct in6_addr a6;
947         struct nfsclsession *tsep;
948
949         if (nfsboottime.tv_sec == 0)
950                 NFSSETBOOTTIME(nfsboottime);
951         if (NFSHASNFSV4N(nmp)) {
952                 error = NFSERR_BADSESSION;
953                 odsp = dsp = NULL;
954                 if (retokp != NULL) {
955                         NFSLOCKMNT(nmp);
956                         odsp = TAILQ_FIRST(&nmp->nm_sess);
957                         NFSUNLOCKMNT(nmp);
958                 }
959                 if (odsp != NULL) {
960                         /*
961                          * When a session already exists, first try a
962                          * CreateSession with the extant ClientID.
963                          */
964                         dsp = malloc(sizeof(struct nfsclds) +
965                             odsp->nfsclds_servownlen + 1, M_NFSCLDS,
966                             M_WAITOK | M_ZERO);
967                         dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
968                         dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
969                         dsp->nfsclds_sess.nfsess_clientid =
970                             odsp->nfsclds_sess.nfsess_clientid;
971                         dsp->nfsclds_sess.nfsess_sequenceid =
972                             odsp->nfsclds_sess.nfsess_sequenceid;
973                         dsp->nfsclds_flags = odsp->nfsclds_flags;
974                         if (dsp->nfsclds_servownlen > 0)
975                                 memcpy(dsp->nfsclds_serverown,
976                                     odsp->nfsclds_serverown,
977                                     dsp->nfsclds_servownlen + 1);
978                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
979                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
980                             NULL, MTX_DEF);
981                         nfscl_initsessionslots(&dsp->nfsclds_sess);
982                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
983                             &nmp->nm_sockreq, NULL,
984                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
985                         NFSCL_DEBUG(1, "create session for extant "
986                             "ClientID=%d\n", error);
987                         if (error != 0) {
988                                 nfscl_freenfsclds(dsp);
989                                 dsp = NULL;
990                                 /*
991                                  * If *retokp is true, return any error other
992                                  * than NFSERR_STALECLIENTID,
993                                  * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
994                                  * so that nfscl_recover() will not loop.
995                                  */
996                                 if (*retokp)
997                                         return (NFSERR_IO);
998                         } else
999                                 *retokp = true;
1000                 } else if (retokp != NULL && *retokp)
1001                         return (NFSERR_IO);
1002                 if (error != 0) {
1003                         /*
1004                          * Either there was no previous session or the
1005                          * CreateSession attempt failed, so...
1006                          * do an ExchangeID followed by the CreateSession.
1007                          */
1008                         clp->nfsc_rev = rev++;
1009                         error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1010                             NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1011                             cred, p);
1012                         NFSCL_DEBUG(1, "aft exch=%d\n", error);
1013                         if (error == 0)
1014                                 error = nfsrpc_createsession(nmp,
1015                                     &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1016                                     dsp->nfsclds_sess.nfsess_sequenceid, 1,
1017                                     cred, p);
1018                         NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1019                 }
1020                 if (error == 0) {
1021                         NFSLOCKMNT(nmp);
1022                         /*
1023                          * The old sessions cannot be safely free'd
1024                          * here, since they may still be used by
1025                          * in-progress RPCs.
1026                          */
1027                         tsep = NULL;
1028                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
1029                                 tsep = NFSMNT_MDSSESSION(nmp);
1030                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1031                             nfsclds_list);
1032                         /*
1033                          * Wake up RPCs waiting for a slot on the
1034                          * old session. These will then fail with
1035                          * NFSERR_BADSESSION and be retried with the
1036                          * new session by nfsv4_setsequence().
1037                          * Also wakeup() processes waiting for the
1038                          * new session.
1039                          */
1040                         if (tsep != NULL)
1041                                 wakeup(&tsep->nfsess_slots);
1042                         wakeup(&nmp->nm_sess);
1043                         NFSUNLOCKMNT(nmp);
1044                 } else if (dsp != NULL)
1045                         nfscl_freenfsclds(dsp);
1046                 if (error == 0 && reclaim == 0) {
1047                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
1048                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1049                         if (error == NFSERR_COMPLETEALREADY ||
1050                             error == NFSERR_NOTSUPP)
1051                                 /* Ignore this error. */
1052                                 error = 0;
1053                 }
1054                 return (error);
1055         } else if (retokp != NULL && *retokp)
1056                 return (NFSERR_IO);
1057         clp->nfsc_rev = rev++;
1058
1059         /*
1060          * Allocate a single session structure for NFSv4.0, because some of
1061          * the fields are used by NFSv4.0 although it doesn't do a session.
1062          */
1063         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1064         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1065         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1066         NFSLOCKMNT(nmp);
1067         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1068         tsep = NFSMNT_MDSSESSION(nmp);
1069         NFSUNLOCKMNT(nmp);
1070
1071         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
1072         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1073         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
1074         *tl = txdr_unsigned(clp->nfsc_rev);
1075         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1076
1077         /*
1078          * set up the callback address
1079          */
1080         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1081         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1082         callblen = strlen(nfsv4_callbackaddr);
1083         if (callblen == 0)
1084                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1085         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1086             (callblen > 0 || cp != NULL)) {
1087                 port = htons(nfsv4_cbport);
1088                 cp2 = (u_int8_t *)&port;
1089 #ifdef INET6
1090                 if ((callblen > 0 &&
1091                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1092                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1093
1094                         (void) nfsm_strtom(nd, "tcp6", 4);
1095                         if (callblen == 0) {
1096                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1097                                 ip6add = ip6buf;
1098                         } else {
1099                                 ip6add = nfsv4_callbackaddr;
1100                         }
1101                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1102                             ip6add, cp2[0], cp2[1]);
1103                 } else
1104 #endif
1105                 {
1106                         (void) nfsm_strtom(nd, "tcp", 3);
1107                         if (callblen == 0)
1108                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1109                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1110                                     cp[2], cp[3], cp2[0], cp2[1]);
1111                         else
1112                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1113                                     "%s.%d.%d", nfsv4_callbackaddr,
1114                                     cp2[0], cp2[1]);
1115                 }
1116                 (void) nfsm_strtom(nd, addr, strlen(addr));
1117         } else {
1118                 (void) nfsm_strtom(nd, "tcp", 3);
1119                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1120         }
1121         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1122         *tl = txdr_unsigned(clp->nfsc_cbident);
1123         nd->nd_flag |= ND_USEGSSNAME;
1124         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1125                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1126         if (error)
1127                 return (error);
1128         if (nd->nd_repstat == 0) {
1129             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1130             tsep->nfsess_clientid.lval[0] = *tl++;
1131             tsep->nfsess_clientid.lval[1] = *tl++;
1132             confirm.lval[0] = *tl++;
1133             confirm.lval[1] = *tl;
1134             m_freem(nd->nd_mrep);
1135             nd->nd_mrep = NULL;
1136
1137             /*
1138              * and confirm it.
1139              */
1140             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1141                 NULL, 0, 0);
1142             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1143             *tl++ = tsep->nfsess_clientid.lval[0];
1144             *tl++ = tsep->nfsess_clientid.lval[1];
1145             *tl++ = confirm.lval[0];
1146             *tl = confirm.lval[1];
1147             nd->nd_flag |= ND_USEGSSNAME;
1148             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1149                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1150             if (error)
1151                 return (error);
1152             m_freem(nd->nd_mrep);
1153             nd->nd_mrep = NULL;
1154             if (nd->nd_repstat == 0) {
1155                 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1156                     nmp->nm_fhsize, NULL, NULL, 0, 0);
1157                 NFSZERO_ATTRBIT(&attrbits);
1158                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1159                 (void) nfsrv_putattrbit(nd, &attrbits);
1160                 nd->nd_flag |= ND_USEGSSNAME;
1161                 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1162                     cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1163                 if (error)
1164                     return (error);
1165                 if (nd->nd_repstat == 0) {
1166                     error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1167                         NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1168                     if (error)
1169                         goto nfsmout;
1170                     clp->nfsc_renew = NFSCL_RENEW(lease);
1171                     clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1172                     clp->nfsc_clientidrev++;
1173                     if (clp->nfsc_clientidrev == 0)
1174                         clp->nfsc_clientidrev++;
1175                 }
1176             }
1177         }
1178         error = nd->nd_repstat;
1179 nfsmout:
1180         m_freem(nd->nd_mrep);
1181         return (error);
1182 }
1183
1184 /*
1185  * nfs getattr call.
1186  */
1187 int
1188 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1189     struct nfsvattr *nap, void *stuff)
1190 {
1191         struct nfsrv_descript nfsd, *nd = &nfsd;
1192         int error;
1193         nfsattrbit_t attrbits;
1194
1195         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1196         if (nd->nd_flag & ND_NFSV4) {
1197                 NFSGETATTR_ATTRBIT(&attrbits);
1198                 (void) nfsrv_putattrbit(nd, &attrbits);
1199         }
1200         error = nfscl_request(nd, vp, p, cred, stuff);
1201         if (error)
1202                 return (error);
1203         if (!nd->nd_repstat)
1204                 error = nfsm_loadattr(nd, nap);
1205         else
1206                 error = nd->nd_repstat;
1207         m_freem(nd->nd_mrep);
1208         return (error);
1209 }
1210
1211 /*
1212  * nfs getattr call with non-vnode arguemnts.
1213  */
1214 int
1215 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1216     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1217     uint32_t *leasep)
1218 {
1219         struct nfsrv_descript nfsd, *nd = &nfsd;
1220         int error, vers = NFS_VER2;
1221         nfsattrbit_t attrbits;
1222
1223         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1224         if (nd->nd_flag & ND_NFSV4) {
1225                 vers = NFS_VER4;
1226                 NFSGETATTR_ATTRBIT(&attrbits);
1227                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1228                 (void) nfsrv_putattrbit(nd, &attrbits);
1229         } else if (nd->nd_flag & ND_NFSV3) {
1230                 vers = NFS_VER3;
1231         }
1232         if (syscred)
1233                 nd->nd_flag |= ND_USEGSSNAME;
1234         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1235             NFS_PROG, vers, NULL, 1, xidp, NULL);
1236         if (error)
1237                 return (error);
1238         if (nd->nd_repstat == 0) {
1239                 if ((nd->nd_flag & ND_NFSV4) != 0)
1240                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1241                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1242                             NULL, NULL);
1243                 else
1244                         error = nfsm_loadattr(nd, nap);
1245         } else
1246                 error = nd->nd_repstat;
1247         m_freem(nd->nd_mrep);
1248         return (error);
1249 }
1250
1251 /*
1252  * Do an nfs setattr operation.
1253  */
1254 int
1255 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1256     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1257     void *stuff)
1258 {
1259         int error, expireret = 0, openerr, retrycnt;
1260         u_int32_t clidrev = 0, mode;
1261         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1262         struct nfsfh *nfhp;
1263         nfsv4stateid_t stateid;
1264         void *lckp;
1265
1266         if (nmp->nm_clp != NULL)
1267                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1268         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1269                 mode = NFSV4OPEN_ACCESSWRITE;
1270         else
1271                 mode = NFSV4OPEN_ACCESSREAD;
1272         retrycnt = 0;
1273         do {
1274                 lckp = NULL;
1275                 openerr = 1;
1276                 if (NFSHASNFSV4(nmp)) {
1277                         nfhp = VTONFS(vp)->n_fhp;
1278                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1279                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1280                         if (error && vnode_vtype(vp) == VREG &&
1281                             (mode == NFSV4OPEN_ACCESSWRITE ||
1282                              nfstest_openallsetattr)) {
1283                                 /*
1284                                  * No Open stateid, so try and open the file
1285                                  * now.
1286                                  */
1287                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1288                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1289                                             p);
1290                                 else
1291                                         openerr = nfsrpc_open(vp, FREAD, cred,
1292                                             p);
1293                                 if (!openerr)
1294                                         (void) nfscl_getstateid(vp,
1295                                             nfhp->nfh_fh, nfhp->nfh_len,
1296                                             mode, 0, cred, p, &stateid, &lckp);
1297                         }
1298                 }
1299                 if (vap != NULL)
1300                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1301                             rnap, attrflagp, stuff);
1302                 else
1303                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1304                             stuff);
1305                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1306                         NFSLOCKMNT(nmp);
1307                         nmp->nm_state |= NFSSTA_OPENMODE;
1308                         NFSUNLOCKMNT(nmp);
1309                 }
1310                 if (error == NFSERR_STALESTATEID)
1311                         nfscl_initiate_recovery(nmp->nm_clp);
1312                 if (lckp != NULL)
1313                         nfscl_lockderef(lckp);
1314                 if (!openerr)
1315                         (void) nfsrpc_close(vp, 0, p);
1316                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1317                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1318                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1319                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1320                 } else if ((error == NFSERR_EXPIRED ||
1321                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1322                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1323                 }
1324                 retrycnt++;
1325         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1326             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1327             error == NFSERR_BADSESSION ||
1328             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1329             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1330              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1331             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1332              retrycnt < 4));
1333         if (error && retrycnt >= 4)
1334                 error = EIO;
1335         return (error);
1336 }
1337
1338 static int
1339 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1340     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1341     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1342 {
1343         u_int32_t *tl;
1344         struct nfsrv_descript nfsd, *nd = &nfsd;
1345         int error;
1346         nfsattrbit_t attrbits;
1347
1348         *attrflagp = 0;
1349         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1350         if (nd->nd_flag & ND_NFSV4)
1351                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1352         vap->va_type = vnode_vtype(vp);
1353         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1354         if (nd->nd_flag & ND_NFSV3) {
1355                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1356                 *tl = newnfs_false;
1357         } else if (nd->nd_flag & ND_NFSV4) {
1358                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1359                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1360                 NFSGETATTR_ATTRBIT(&attrbits);
1361                 (void) nfsrv_putattrbit(nd, &attrbits);
1362         }
1363         error = nfscl_request(nd, vp, p, cred, stuff);
1364         if (error)
1365                 return (error);
1366         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1367                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1368         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1369                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1370         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1371                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1372         m_freem(nd->nd_mrep);
1373         if (nd->nd_repstat && !error)
1374                 error = nd->nd_repstat;
1375         return (error);
1376 }
1377
1378 /*
1379  * nfs lookup rpc
1380  */
1381 int
1382 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1383     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1384     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1385 {
1386         u_int32_t *tl;
1387         struct nfsrv_descript nfsd, *nd = &nfsd;
1388         struct nfsmount *nmp;
1389         struct nfsnode *np;
1390         struct nfsfh *nfhp;
1391         nfsattrbit_t attrbits;
1392         int error = 0, lookupp = 0;
1393
1394         *attrflagp = 0;
1395         *dattrflagp = 0;
1396         if (vnode_vtype(dvp) != VDIR)
1397                 return (ENOTDIR);
1398         nmp = VFSTONFS(dvp->v_mount);
1399         if (len > NFS_MAXNAMLEN)
1400                 return (ENAMETOOLONG);
1401         if (NFSHASNFSV4(nmp) && len == 1 &&
1402                 name[0] == '.') {
1403                 /*
1404                  * Just return the current dir's fh.
1405                  */
1406                 np = VTONFS(dvp);
1407                 nfhp = malloc(sizeof (struct nfsfh) +
1408                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1409                 nfhp->nfh_len = np->n_fhp->nfh_len;
1410                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1411                 *nfhpp = nfhp;
1412                 return (0);
1413         }
1414         if (NFSHASNFSV4(nmp) && len == 2 &&
1415                 name[0] == '.' && name[1] == '.') {
1416                 lookupp = 1;
1417                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1418         } else {
1419                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1420                 (void) nfsm_strtom(nd, name, len);
1421         }
1422         if (nd->nd_flag & ND_NFSV4) {
1423                 NFSGETATTR_ATTRBIT(&attrbits);
1424                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1425                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1426                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1427                 (void) nfsrv_putattrbit(nd, &attrbits);
1428         }
1429         error = nfscl_request(nd, dvp, p, cred, stuff);
1430         if (error)
1431                 return (error);
1432         if (nd->nd_repstat) {
1433                 /*
1434                  * When an NFSv4 Lookupp returns ENOENT, it means that
1435                  * the lookup is at the root of an fs, so return this dir.
1436                  */
1437                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1438                     np = VTONFS(dvp);
1439                     nfhp = malloc(sizeof (struct nfsfh) +
1440                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1441                     nfhp->nfh_len = np->n_fhp->nfh_len;
1442                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1443                     *nfhpp = nfhp;
1444                     m_freem(nd->nd_mrep);
1445                     return (0);
1446                 }
1447                 if (nd->nd_flag & ND_NFSV3)
1448                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1449                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1450                     ND_NFSV4) {
1451                         /* Load the directory attributes. */
1452                         error = nfsm_loadattr(nd, dnap);
1453                         if (error == 0)
1454                                 *dattrflagp = 1;
1455                 }
1456                 goto nfsmout;
1457         }
1458         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1459                 /* Load the directory attributes. */
1460                 error = nfsm_loadattr(nd, dnap);
1461                 if (error != 0)
1462                         goto nfsmout;
1463                 *dattrflagp = 1;
1464                 /* Skip over the Lookup and GetFH operation status values. */
1465                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1466         }
1467         error = nfsm_getfh(nd, nfhpp);
1468         if (error)
1469                 goto nfsmout;
1470
1471         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1472         if ((nd->nd_flag & ND_NFSV3) && !error)
1473                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1474 nfsmout:
1475         m_freem(nd->nd_mrep);
1476         if (!error && nd->nd_repstat)
1477                 error = nd->nd_repstat;
1478         return (error);
1479 }
1480
1481 /*
1482  * Do a readlink rpc.
1483  */
1484 int
1485 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1486     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1487 {
1488         u_int32_t *tl;
1489         struct nfsrv_descript nfsd, *nd = &nfsd;
1490         struct nfsnode *np = VTONFS(vp);
1491         nfsattrbit_t attrbits;
1492         int error, len, cangetattr = 1;
1493
1494         *attrflagp = 0;
1495         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1496         if (nd->nd_flag & ND_NFSV4) {
1497                 /*
1498                  * And do a Getattr op.
1499                  */
1500                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1501                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1502                 NFSGETATTR_ATTRBIT(&attrbits);
1503                 (void) nfsrv_putattrbit(nd, &attrbits);
1504         }
1505         error = nfscl_request(nd, vp, p, cred, stuff);
1506         if (error)
1507                 return (error);
1508         if (nd->nd_flag & ND_NFSV3)
1509                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1510         if (!nd->nd_repstat && !error) {
1511                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1512                 /*
1513                  * This seems weird to me, but must have been added to
1514                  * FreeBSD for some reason. The only thing I can think of
1515                  * is that there was/is some server that replies with
1516                  * more link data than it should?
1517                  */
1518                 if (len == NFS_MAXPATHLEN) {
1519                         NFSLOCKNODE(np);
1520                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1521                                 len = np->n_size;
1522                                 cangetattr = 0;
1523                         }
1524                         NFSUNLOCKNODE(np);
1525                 }
1526                 error = nfsm_mbufuio(nd, uiop, len);
1527                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1528                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1529         }
1530         if (nd->nd_repstat && !error)
1531                 error = nd->nd_repstat;
1532 nfsmout:
1533         m_freem(nd->nd_mrep);
1534         return (error);
1535 }
1536
1537 /*
1538  * Read operation.
1539  */
1540 int
1541 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1542     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1543 {
1544         int error, expireret = 0, retrycnt;
1545         u_int32_t clidrev = 0;
1546         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1547         struct nfsnode *np = VTONFS(vp);
1548         struct ucred *newcred;
1549         struct nfsfh *nfhp = NULL;
1550         nfsv4stateid_t stateid;
1551         void *lckp;
1552
1553         if (nmp->nm_clp != NULL)
1554                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1555         newcred = cred;
1556         if (NFSHASNFSV4(nmp)) {
1557                 nfhp = np->n_fhp;
1558                 newcred = NFSNEWCRED(cred);
1559         }
1560         retrycnt = 0;
1561         do {
1562                 lckp = NULL;
1563                 if (NFSHASNFSV4(nmp))
1564                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1565                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1566                             &lckp);
1567                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1568                     attrflagp, stuff);
1569                 if (error == NFSERR_OPENMODE) {
1570                         NFSLOCKMNT(nmp);
1571                         nmp->nm_state |= NFSSTA_OPENMODE;
1572                         NFSUNLOCKMNT(nmp);
1573                 }
1574                 if (error == NFSERR_STALESTATEID)
1575                         nfscl_initiate_recovery(nmp->nm_clp);
1576                 if (lckp != NULL)
1577                         nfscl_lockderef(lckp);
1578                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1579                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1580                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1581                         (void) nfs_catnap(PZERO, error, "nfs_read");
1582                 } else if ((error == NFSERR_EXPIRED ||
1583                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1584                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1585                 }
1586                 retrycnt++;
1587         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1588             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1589             error == NFSERR_BADSESSION ||
1590             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1591             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1592              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1593             (error == NFSERR_OPENMODE && retrycnt < 4));
1594         if (error && retrycnt >= 4)
1595                 error = EIO;
1596         if (NFSHASNFSV4(nmp))
1597                 NFSFREECRED(newcred);
1598         return (error);
1599 }
1600
1601 /*
1602  * The actual read RPC.
1603  */
1604 static int
1605 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1606     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1607     int *attrflagp, void *stuff)
1608 {
1609         u_int32_t *tl;
1610         int error = 0, len, retlen, tsiz, eof = 0;
1611         struct nfsrv_descript nfsd;
1612         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1613         struct nfsrv_descript *nd = &nfsd;
1614         int rsize;
1615         off_t tmp_off;
1616
1617         *attrflagp = 0;
1618         tsiz = uiop->uio_resid;
1619         tmp_off = uiop->uio_offset + tsiz;
1620         NFSLOCKMNT(nmp);
1621         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1622                 NFSUNLOCKMNT(nmp);
1623                 return (EFBIG);
1624         }
1625         rsize = nmp->nm_rsize;
1626         NFSUNLOCKMNT(nmp);
1627         nd->nd_mrep = NULL;
1628         while (tsiz > 0) {
1629                 *attrflagp = 0;
1630                 len = (tsiz > rsize) ? rsize : tsiz;
1631                 NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1632                 if (nd->nd_flag & ND_NFSV4)
1633                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1634                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1635                 if (nd->nd_flag & ND_NFSV2) {
1636                         *tl++ = txdr_unsigned(uiop->uio_offset);
1637                         *tl++ = txdr_unsigned(len);
1638                         *tl = 0;
1639                 } else {
1640                         txdr_hyper(uiop->uio_offset, tl);
1641                         *(tl + 2) = txdr_unsigned(len);
1642                 }
1643                 /*
1644                  * Since I can't do a Getattr for NFSv4 for Write, there
1645                  * doesn't seem any point in doing one here, either.
1646                  * (See the comment in nfsrpc_writerpc() for more info.)
1647                  */
1648                 error = nfscl_request(nd, vp, p, cred, stuff);
1649                 if (error)
1650                         return (error);
1651                 if (nd->nd_flag & ND_NFSV3) {
1652                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1653                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1654                         error = nfsm_loadattr(nd, nap);
1655                         if (!error)
1656                                 *attrflagp = 1;
1657                 }
1658                 if (nd->nd_repstat || error) {
1659                         if (!error)
1660                                 error = nd->nd_repstat;
1661                         goto nfsmout;
1662                 }
1663                 if (nd->nd_flag & ND_NFSV3) {
1664                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1665                         eof = fxdr_unsigned(int, *(tl + 1));
1666                 } else if (nd->nd_flag & ND_NFSV4) {
1667                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1668                         eof = fxdr_unsigned(int, *tl);
1669                 }
1670                 NFSM_STRSIZ(retlen, len);
1671                 error = nfsm_mbufuio(nd, uiop, retlen);
1672                 if (error)
1673                         goto nfsmout;
1674                 m_freem(nd->nd_mrep);
1675                 nd->nd_mrep = NULL;
1676                 tsiz -= retlen;
1677                 if (!(nd->nd_flag & ND_NFSV2)) {
1678                         if (eof || retlen == 0)
1679                                 tsiz = 0;
1680                 } else if (retlen < len)
1681                         tsiz = 0;
1682         }
1683         return (0);
1684 nfsmout:
1685         if (nd->nd_mrep != NULL)
1686                 m_freem(nd->nd_mrep);
1687         return (error);
1688 }
1689
1690 /*
1691  * nfs write operation
1692  * When called_from_strategy != 0, it should return EIO for an error that
1693  * indicates recovery is in progress, so that the buffer will be left
1694  * dirty and be written back to the server later. If it loops around,
1695  * the recovery thread could get stuck waiting for the buffer and recovery
1696  * will then deadlock.
1697  */
1698 int
1699 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1700     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1701     void *stuff, int called_from_strategy)
1702 {
1703         int error, expireret = 0, retrycnt, nostateid;
1704         u_int32_t clidrev = 0;
1705         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1706         struct nfsnode *np = VTONFS(vp);
1707         struct ucred *newcred;
1708         struct nfsfh *nfhp = NULL;
1709         nfsv4stateid_t stateid;
1710         void *lckp;
1711
1712         *must_commit = 0;
1713         if (nmp->nm_clp != NULL)
1714                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1715         newcred = cred;
1716         if (NFSHASNFSV4(nmp)) {
1717                 newcred = NFSNEWCRED(cred);
1718                 nfhp = np->n_fhp;
1719         }
1720         retrycnt = 0;
1721         do {
1722                 lckp = NULL;
1723                 nostateid = 0;
1724                 if (NFSHASNFSV4(nmp)) {
1725                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1726                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1727                             &lckp);
1728                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1729                             stateid.other[2] == 0) {
1730                                 nostateid = 1;
1731                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1732                         }
1733                 }
1734
1735                 /*
1736                  * If there is no stateid for NFSv4, it means this is an
1737                  * extraneous write after close. Basically a poorly
1738                  * implemented buffer cache. Just don't do the write.
1739                  */
1740                 if (nostateid)
1741                         error = 0;
1742                 else
1743                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1744                             newcred, &stateid, p, nap, attrflagp, stuff);
1745                 if (error == NFSERR_STALESTATEID)
1746                         nfscl_initiate_recovery(nmp->nm_clp);
1747                 if (lckp != NULL)
1748                         nfscl_lockderef(lckp);
1749                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1750                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1751                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1752                         (void) nfs_catnap(PZERO, error, "nfs_write");
1753                 } else if ((error == NFSERR_EXPIRED ||
1754                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1755                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1756                 }
1757                 retrycnt++;
1758         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1759             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1760               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1761             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1762             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1763              expireret == 0 && clidrev != 0 && retrycnt < 4));
1764         if (error != 0 && (retrycnt >= 4 ||
1765             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1766               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1767                 error = EIO;
1768         if (NFSHASNFSV4(nmp))
1769                 NFSFREECRED(newcred);
1770         return (error);
1771 }
1772
1773 /*
1774  * The actual write RPC.
1775  */
1776 static int
1777 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1778     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1779     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1780 {
1781         u_int32_t *tl;
1782         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1783         struct nfsnode *np = VTONFS(vp);
1784         int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1785         int wccflag = 0, wsize;
1786         int32_t backup;
1787         struct nfsrv_descript nfsd;
1788         struct nfsrv_descript *nd = &nfsd;
1789         nfsattrbit_t attrbits;
1790         off_t tmp_off;
1791
1792         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1793         *attrflagp = 0;
1794         tsiz = uiop->uio_resid;
1795         tmp_off = uiop->uio_offset + tsiz;
1796         NFSLOCKMNT(nmp);
1797         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1798                 NFSUNLOCKMNT(nmp);
1799                 return (EFBIG);
1800         }
1801         wsize = nmp->nm_wsize;
1802         NFSUNLOCKMNT(nmp);
1803         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1804         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1805         while (tsiz > 0) {
1806                 *attrflagp = 0;
1807                 len = (tsiz > wsize) ? wsize : tsiz;
1808                 NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1809                 if (nd->nd_flag & ND_NFSV4) {
1810                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1811                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1812                         txdr_hyper(uiop->uio_offset, tl);
1813                         tl += 2;
1814                         *tl++ = txdr_unsigned(*iomode);
1815                         *tl = txdr_unsigned(len);
1816                 } else if (nd->nd_flag & ND_NFSV3) {
1817                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1818                         txdr_hyper(uiop->uio_offset, tl);
1819                         tl += 2;
1820                         *tl++ = txdr_unsigned(len);
1821                         *tl++ = txdr_unsigned(*iomode);
1822                         *tl = txdr_unsigned(len);
1823                 } else {
1824                         u_int32_t x;
1825
1826                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1827                         /*
1828                          * Not sure why someone changed this, since the
1829                          * RFC clearly states that "beginoffset" and
1830                          * "totalcount" are ignored, but it wouldn't
1831                          * surprise me if there's a busted server out there.
1832                          */
1833                         /* Set both "begin" and "current" to non-garbage. */
1834                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1835                         *tl++ = x;      /* "begin offset" */
1836                         *tl++ = x;      /* "current offset" */
1837                         x = txdr_unsigned(len);
1838                         *tl++ = x;      /* total to this offset */
1839                         *tl = x;        /* size of this write */
1840                 }
1841                 nfsm_uiombuf(nd, uiop, len);
1842                 /*
1843                  * Although it is tempting to do a normal Getattr Op in the
1844                  * NFSv4 compound, the result can be a nearly hung client
1845                  * system if the Getattr asks for Owner and/or OwnerGroup.
1846                  * It occurs when the client can't map either the Owner or
1847                  * Owner_group name in the Getattr reply to a uid/gid. When
1848                  * there is a cache miss, the kernel does an upcall to the
1849                  * nfsuserd. Then, it can try and read the local /etc/passwd
1850                  * or /etc/group file. It can then block in getnewbuf(),
1851                  * waiting for dirty writes to be pushed to the NFS server.
1852                  * The only reason this doesn't result in a complete
1853                  * deadlock, is that the upcall times out and allows
1854                  * the write to complete. However, progress is so slow
1855                  * that it might just as well be deadlocked.
1856                  * As such, we get the rest of the attributes, but not
1857                  * Owner or Owner_group.
1858                  * nb: nfscl_loadattrcache() needs to be told that these
1859                  *     partial attributes from a write rpc are being
1860                  *     passed in, via a argument flag.
1861                  */
1862                 if (nd->nd_flag & ND_NFSV4) {
1863                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1864                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1865                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1866                         (void) nfsrv_putattrbit(nd, &attrbits);
1867                 }
1868                 error = nfscl_request(nd, vp, p, cred, stuff);
1869                 if (error)
1870                         return (error);
1871                 if (nd->nd_repstat) {
1872                         /*
1873                          * In case the rpc gets retried, roll
1874                          * the uio fileds changed by nfsm_uiombuf()
1875                          * back.
1876                          */
1877                         uiop->uio_offset -= len;
1878                         uiop->uio_resid += len;
1879                         uiop->uio_iov->iov_base =
1880                             (char *)uiop->uio_iov->iov_base - len;
1881                         uiop->uio_iov->iov_len += len;
1882                 }
1883                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1884                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1885                             &wccflag, stuff);
1886                         if (error)
1887                                 goto nfsmout;
1888                 }
1889                 if (!nd->nd_repstat) {
1890                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1891                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1892                                         + NFSX_VERF);
1893                                 rlen = fxdr_unsigned(int, *tl++);
1894                                 if (rlen == 0) {
1895                                         error = NFSERR_IO;
1896                                         goto nfsmout;
1897                                 } else if (rlen < len) {
1898                                         backup = len - rlen;
1899                                         uiop->uio_iov->iov_base =
1900                                             (char *)uiop->uio_iov->iov_base -
1901                                             backup;
1902                                         uiop->uio_iov->iov_len += backup;
1903                                         uiop->uio_offset -= backup;
1904                                         uiop->uio_resid += backup;
1905                                         len = rlen;
1906                                 }
1907                                 commit = fxdr_unsigned(int, *tl++);
1908
1909                                 /*
1910                                  * Return the lowest commitment level
1911                                  * obtained by any of the RPCs.
1912                                  */
1913                                 if (committed == NFSWRITE_FILESYNC)
1914                                         committed = commit;
1915                                 else if (committed == NFSWRITE_DATASYNC &&
1916                                         commit == NFSWRITE_UNSTABLE)
1917                                         committed = commit;
1918                                 NFSLOCKMNT(nmp);
1919                                 if (!NFSHASWRITEVERF(nmp)) {
1920                                         NFSBCOPY((caddr_t)tl,
1921                                             (caddr_t)&nmp->nm_verf[0],
1922                                             NFSX_VERF);
1923                                         NFSSETWRITEVERF(nmp);
1924                                 } else if (NFSBCMP(tl, nmp->nm_verf,
1925                                     NFSX_VERF)) {
1926                                         *must_commit = 1;
1927                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1928                                 }
1929                                 NFSUNLOCKMNT(nmp);
1930                         }
1931                         if (nd->nd_flag & ND_NFSV4)
1932                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1933                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1934                                 error = nfsm_loadattr(nd, nap);
1935                                 if (!error)
1936                                         *attrflagp = NFS_LATTR_NOSHRINK;
1937                         }
1938                 } else {
1939                         error = nd->nd_repstat;
1940                 }
1941                 if (error)
1942                         goto nfsmout;
1943                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1944                 m_freem(nd->nd_mrep);
1945                 nd->nd_mrep = NULL;
1946                 tsiz -= len;
1947         }
1948 nfsmout:
1949         if (nd->nd_mrep != NULL)
1950                 m_freem(nd->nd_mrep);
1951         *iomode = committed;
1952         if (nd->nd_repstat && !error)
1953                 error = nd->nd_repstat;
1954         return (error);
1955 }
1956
1957 /*
1958  * nfs mknod rpc
1959  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1960  * mode set to specify the file type and the size field for rdev.
1961  */
1962 int
1963 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1964     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1965     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1966     int *attrflagp, int *dattrflagp, void *dstuff)
1967 {
1968         u_int32_t *tl;
1969         int error = 0;
1970         struct nfsrv_descript nfsd, *nd = &nfsd;
1971         nfsattrbit_t attrbits;
1972
1973         *nfhpp = NULL;
1974         *attrflagp = 0;
1975         *dattrflagp = 0;
1976         if (namelen > NFS_MAXNAMLEN)
1977                 return (ENAMETOOLONG);
1978         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1979         if (nd->nd_flag & ND_NFSV4) {
1980                 if (vtyp == VBLK || vtyp == VCHR) {
1981                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1982                         *tl++ = vtonfsv34_type(vtyp);
1983                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1984                         *tl = txdr_unsigned(NFSMINOR(rdev));
1985                 } else {
1986                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1987                         *tl = vtonfsv34_type(vtyp);
1988                 }
1989         }
1990         (void) nfsm_strtom(nd, name, namelen);
1991         if (nd->nd_flag & ND_NFSV3) {
1992                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1993                 *tl = vtonfsv34_type(vtyp);
1994         }
1995         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1996                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
1997         if ((nd->nd_flag & ND_NFSV3) &&
1998             (vtyp == VCHR || vtyp == VBLK)) {
1999                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2000                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2001                 *tl = txdr_unsigned(NFSMINOR(rdev));
2002         }
2003         if (nd->nd_flag & ND_NFSV4) {
2004                 NFSGETATTR_ATTRBIT(&attrbits);
2005                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2006                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2007                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2008                 (void) nfsrv_putattrbit(nd, &attrbits);
2009         }
2010         if (nd->nd_flag & ND_NFSV2)
2011                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2012         error = nfscl_request(nd, dvp, p, cred, dstuff);
2013         if (error)
2014                 return (error);
2015         if (nd->nd_flag & ND_NFSV4)
2016                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2017         if (!nd->nd_repstat) {
2018                 if (nd->nd_flag & ND_NFSV4) {
2019                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2020                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2021                         if (error)
2022                                 goto nfsmout;
2023                 }
2024                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2025                 if (error)
2026                         goto nfsmout;
2027         }
2028         if (nd->nd_flag & ND_NFSV3)
2029                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2030         if (!error && nd->nd_repstat)
2031                 error = nd->nd_repstat;
2032 nfsmout:
2033         m_freem(nd->nd_mrep);
2034         return (error);
2035 }
2036
2037 /*
2038  * nfs file create call
2039  * Mostly just call the approriate routine. (I separated out v4, so that
2040  * error recovery wouldn't be as difficult.)
2041  */
2042 int
2043 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2044     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2045     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2046     int *attrflagp, int *dattrflagp, void *dstuff)
2047 {
2048         int error = 0, newone, expireret = 0, retrycnt, unlocked;
2049         struct nfsclowner *owp;
2050         struct nfscldeleg *dp;
2051         struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2052         u_int32_t clidrev;
2053
2054         if (NFSHASNFSV4(nmp)) {
2055             retrycnt = 0;
2056             do {
2057                 dp = NULL;
2058                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2059                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2060                     NULL, 1);
2061                 if (error)
2062                         return (error);
2063                 if (nmp->nm_clp != NULL)
2064                         clidrev = nmp->nm_clp->nfsc_clientidrev;
2065                 else
2066                         clidrev = 0;
2067                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2068                     nfs_numnfscbd == 0 || retrycnt > 0)
2069                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2070                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2071                           attrflagp, dattrflagp, dstuff, &unlocked);
2072                 else
2073                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2074                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2075                           attrflagp, dattrflagp, dstuff, &unlocked);
2076                 /*
2077                  * There is no need to invalidate cached attributes here,
2078                  * since new post-delegation issue attributes are always
2079                  * returned by nfsrpc_createv4() and these will update the
2080                  * attribute cache.
2081                  */
2082                 if (dp != NULL)
2083                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2084                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2085                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2086                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2087                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2088                     error == NFSERR_BADSESSION) {
2089                         (void) nfs_catnap(PZERO, error, "nfs_open");
2090                 } else if ((error == NFSERR_EXPIRED ||
2091                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2092                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2093                         retrycnt++;
2094                 }
2095             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2096                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2097                 error == NFSERR_BADSESSION ||
2098                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2099                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2100             if (error && retrycnt >= 4)
2101                     error = EIO;
2102         } else {
2103                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2104                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2105                     dstuff);
2106         }
2107         return (error);
2108 }
2109
2110 /*
2111  * The create rpc for v2 and 3.
2112  */
2113 static int
2114 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2115     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2116     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2117     int *attrflagp, int *dattrflagp, void *dstuff)
2118 {
2119         u_int32_t *tl;
2120         int error = 0;
2121         struct nfsrv_descript nfsd, *nd = &nfsd;
2122
2123         *nfhpp = NULL;
2124         *attrflagp = 0;
2125         *dattrflagp = 0;
2126         if (namelen > NFS_MAXNAMLEN)
2127                 return (ENAMETOOLONG);
2128         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2129         (void) nfsm_strtom(nd, name, namelen);
2130         if (nd->nd_flag & ND_NFSV3) {
2131                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2132                 if (fmode & O_EXCL) {
2133                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2134                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2135                         *tl++ = cverf.lval[0];
2136                         *tl = cverf.lval[1];
2137                 } else {
2138                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2139                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2140                 }
2141         } else {
2142                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2143         }
2144         error = nfscl_request(nd, dvp, p, cred, dstuff);
2145         if (error)
2146                 return (error);
2147         if (nd->nd_repstat == 0) {
2148                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2149                 if (error)
2150                         goto nfsmout;
2151         }
2152         if (nd->nd_flag & ND_NFSV3)
2153                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2154         if (nd->nd_repstat != 0 && error == 0)
2155                 error = nd->nd_repstat;
2156 nfsmout:
2157         m_freem(nd->nd_mrep);
2158         return (error);
2159 }
2160
2161 static int
2162 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2163     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2164     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2165     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2166     int *dattrflagp, void *dstuff, int *unlockedp)
2167 {
2168         u_int32_t *tl;
2169         int error = 0, deleg, newone, ret, acesize, limitby;
2170         struct nfsrv_descript nfsd, *nd = &nfsd;
2171         struct nfsclopen *op;
2172         struct nfscldeleg *dp = NULL;
2173         struct nfsnode *np;
2174         struct nfsfh *nfhp;
2175         nfsattrbit_t attrbits;
2176         nfsv4stateid_t stateid;
2177         u_int32_t rflags;
2178         struct nfsmount *nmp;
2179         struct nfsclsession *tsep;
2180
2181         nmp = VFSTONFS(dvp->v_mount);
2182         np = VTONFS(dvp);
2183         *unlockedp = 0;
2184         *nfhpp = NULL;
2185         *dpp = NULL;
2186         *attrflagp = 0;
2187         *dattrflagp = 0;
2188         if (namelen > NFS_MAXNAMLEN)
2189                 return (ENAMETOOLONG);
2190         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2191         /*
2192          * For V4, this is actually an Open op.
2193          */
2194         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2195         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2196         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2197             NFSV4OPEN_ACCESSREAD);
2198         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2199         tsep = nfsmnt_mdssession(nmp);
2200         *tl++ = tsep->nfsess_clientid.lval[0];
2201         *tl = tsep->nfsess_clientid.lval[1];
2202         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2203         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2204         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2205         if (fmode & O_EXCL) {
2206                 if (NFSHASNFSV4N(nmp)) {
2207                         if (NFSHASSESSPERSIST(nmp)) {
2208                                 /* Use GUARDED for persistent sessions. */
2209                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2210                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2211                         } else {
2212                                 /* Otherwise, use EXCLUSIVE4_1. */
2213                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2214                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2215                                 *tl++ = cverf.lval[0];
2216                                 *tl = cverf.lval[1];
2217                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2218                         }
2219                 } else {
2220                         /* NFSv4.0 */
2221                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2222                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2223                         *tl++ = cverf.lval[0];
2224                         *tl = cverf.lval[1];
2225                 }
2226         } else {
2227                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2228                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2229         }
2230         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2231         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2232         (void) nfsm_strtom(nd, name, namelen);
2233         /* Get the new file's handle and attributes. */
2234         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2235         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2236         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2237         NFSGETATTR_ATTRBIT(&attrbits);
2238         (void) nfsrv_putattrbit(nd, &attrbits);
2239         /* Get the directory's post-op attributes. */
2240         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2241         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2242         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2243         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2244         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2245         (void) nfsrv_putattrbit(nd, &attrbits);
2246         error = nfscl_request(nd, dvp, p, cred, dstuff);
2247         if (error)
2248                 return (error);
2249         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2250         if (nd->nd_repstat == 0) {
2251                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2252                     6 * NFSX_UNSIGNED);
2253                 stateid.seqid = *tl++;
2254                 stateid.other[0] = *tl++;
2255                 stateid.other[1] = *tl++;
2256                 stateid.other[2] = *tl;
2257                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2258                 (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2259                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2260                 deleg = fxdr_unsigned(int, *tl);
2261                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2262                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2263                         if (!(owp->nfsow_clp->nfsc_flags &
2264                               NFSCLFLAGS_FIRSTDELEG))
2265                                 owp->nfsow_clp->nfsc_flags |=
2266                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2267                         dp = malloc(
2268                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2269                             M_NFSCLDELEG, M_WAITOK);
2270                         LIST_INIT(&dp->nfsdl_owner);
2271                         LIST_INIT(&dp->nfsdl_lock);
2272                         dp->nfsdl_clp = owp->nfsow_clp;
2273                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2274                         nfscl_lockinit(&dp->nfsdl_rwlock);
2275                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2276                             NFSX_UNSIGNED);
2277                         dp->nfsdl_stateid.seqid = *tl++;
2278                         dp->nfsdl_stateid.other[0] = *tl++;
2279                         dp->nfsdl_stateid.other[1] = *tl++;
2280                         dp->nfsdl_stateid.other[2] = *tl++;
2281                         ret = fxdr_unsigned(int, *tl);
2282                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2283                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2284                                 /*
2285                                  * Indicates how much the file can grow.
2286                                  */
2287                                 NFSM_DISSECT(tl, u_int32_t *,
2288                                     3 * NFSX_UNSIGNED);
2289                                 limitby = fxdr_unsigned(int, *tl++);
2290                                 switch (limitby) {
2291                                 case NFSV4OPEN_LIMITSIZE:
2292                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2293                                         break;
2294                                 case NFSV4OPEN_LIMITBLOCKS:
2295                                         dp->nfsdl_sizelimit =
2296                                             fxdr_unsigned(u_int64_t, *tl++);
2297                                         dp->nfsdl_sizelimit *=
2298                                             fxdr_unsigned(u_int64_t, *tl);
2299                                         break;
2300                                 default:
2301                                         error = NFSERR_BADXDR;
2302                                         goto nfsmout;
2303                                 }
2304                         } else {
2305                                 dp->nfsdl_flags = NFSCLDL_READ;
2306                         }
2307                         if (ret)
2308                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2309                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2310                             &acesize, p);
2311                         if (error)
2312                                 goto nfsmout;
2313                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2314                         error = NFSERR_BADXDR;
2315                         goto nfsmout;
2316                 }
2317                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2318                 if (error)
2319                         goto nfsmout;
2320                 /* Get rid of the PutFH and Getattr status values. */
2321                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2322                 /* Load the directory attributes. */
2323                 error = nfsm_loadattr(nd, dnap);
2324                 if (error)
2325                         goto nfsmout;
2326                 *dattrflagp = 1;
2327                 if (dp != NULL && *attrflagp) {
2328                         dp->nfsdl_change = nnap->na_filerev;
2329                         dp->nfsdl_modtime = nnap->na_mtime;
2330                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2331                 }
2332                 /*
2333                  * We can now complete the Open state.
2334                  */
2335                 nfhp = *nfhpp;
2336                 if (dp != NULL) {
2337                         dp->nfsdl_fhlen = nfhp->nfh_len;
2338                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2339                 }
2340                 /*
2341                  * Get an Open structure that will be
2342                  * attached to the OpenOwner, acquired already.
2343                  */
2344                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2345                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2346                     cred, p, NULL, &op, &newone, NULL, 0);
2347                 if (error)
2348                         goto nfsmout;
2349                 op->nfso_stateid = stateid;
2350                 newnfs_copyincred(cred, &op->nfso_cred);
2351                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2352                     do {
2353                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2354                             nfhp->nfh_len, op, cred, p);
2355                         if (ret == NFSERR_DELAY)
2356                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2357                     } while (ret == NFSERR_DELAY);
2358                     error = ret;
2359                 }
2360
2361                 /*
2362                  * If the server is handing out delegations, but we didn't
2363                  * get one because an OpenConfirm was required, try the
2364                  * Open again, to get a delegation. This is a harmless no-op,
2365                  * from a server's point of view.
2366                  */
2367                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2368                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2369                     !error && dp == NULL) {
2370                     do {
2371                         ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2372                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2373                             nfhp->nfh_fh, nfhp->nfh_len,
2374                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2375                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2376                         if (ret == NFSERR_DELAY)
2377                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2378                     } while (ret == NFSERR_DELAY);
2379                     if (ret) {
2380                         if (dp != NULL) {
2381                                 free(dp, M_NFSCLDELEG);
2382                                 dp = NULL;
2383                         }
2384                         if (ret == NFSERR_STALECLIENTID ||
2385                             ret == NFSERR_STALEDONTRECOVER ||
2386                             ret == NFSERR_BADSESSION)
2387                                 error = ret;
2388                     }
2389                 }
2390                 nfscl_openrelease(nmp, op, error, newone);
2391                 *unlockedp = 1;
2392         }
2393         if (nd->nd_repstat != 0 && error == 0)
2394                 error = nd->nd_repstat;
2395         if (error == NFSERR_STALECLIENTID)
2396                 nfscl_initiate_recovery(owp->nfsow_clp);
2397 nfsmout:
2398         if (!error)
2399                 *dpp = dp;
2400         else if (dp != NULL)
2401                 free(dp, M_NFSCLDELEG);
2402         m_freem(nd->nd_mrep);
2403         return (error);
2404 }
2405
2406 /*
2407  * Nfs remove rpc
2408  */
2409 int
2410 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2411     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2412     void *dstuff)
2413 {
2414         u_int32_t *tl;
2415         struct nfsrv_descript nfsd, *nd = &nfsd;
2416         struct nfsnode *np;
2417         struct nfsmount *nmp;
2418         nfsv4stateid_t dstateid;
2419         int error, ret = 0, i;
2420
2421         *dattrflagp = 0;
2422         if (namelen > NFS_MAXNAMLEN)
2423                 return (ENAMETOOLONG);
2424         nmp = VFSTONFS(dvp->v_mount);
2425 tryagain:
2426         if (NFSHASNFSV4(nmp) && ret == 0) {
2427                 ret = nfscl_removedeleg(vp, p, &dstateid);
2428                 if (ret == 1) {
2429                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2430                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2431                             NFSX_UNSIGNED);
2432                         if (NFSHASNFSV4N(nmp))
2433                                 *tl++ = 0;
2434                         else
2435                                 *tl++ = dstateid.seqid;
2436                         *tl++ = dstateid.other[0];
2437                         *tl++ = dstateid.other[1];
2438                         *tl++ = dstateid.other[2];
2439                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2440                         np = VTONFS(dvp);
2441                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2442                             np->n_fhp->nfh_len, 0);
2443                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2444                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2445                 }
2446         } else {
2447                 ret = 0;
2448         }
2449         if (ret == 0)
2450                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2451         (void) nfsm_strtom(nd, name, namelen);
2452         error = nfscl_request(nd, dvp, p, cred, dstuff);
2453         if (error)
2454                 return (error);
2455         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2456                 /* For NFSv4, parse out any Delereturn replies. */
2457                 if (ret > 0 && nd->nd_repstat != 0 &&
2458                     (nd->nd_flag & ND_NOMOREDATA)) {
2459                         /*
2460                          * If the Delegreturn failed, try again without
2461                          * it. The server will Recall, as required.
2462                          */
2463                         m_freem(nd->nd_mrep);
2464                         goto tryagain;
2465                 }
2466                 for (i = 0; i < (ret * 2); i++) {
2467                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2468                             ND_NFSV4) {
2469                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2470                             if (*(tl + 1))
2471                                 nd->nd_flag |= ND_NOMOREDATA;
2472                         }
2473                 }
2474                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2475         }
2476         if (nd->nd_repstat && !error)
2477                 error = nd->nd_repstat;
2478 nfsmout:
2479         m_freem(nd->nd_mrep);
2480         return (error);
2481 }
2482
2483 /*
2484  * Do an nfs rename rpc.
2485  */
2486 int
2487 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2488     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2489     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2490     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2491 {
2492         u_int32_t *tl;
2493         struct nfsrv_descript nfsd, *nd = &nfsd;
2494         struct nfsmount *nmp;
2495         struct nfsnode *np;
2496         nfsattrbit_t attrbits;
2497         nfsv4stateid_t fdstateid, tdstateid;
2498         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2499
2500         *fattrflagp = 0;
2501         *tattrflagp = 0;
2502         nmp = VFSTONFS(fdvp->v_mount);
2503         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2504                 return (ENAMETOOLONG);
2505 tryagain:
2506         if (NFSHASNFSV4(nmp) && ret == 0) {
2507                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2508                     &tdstateid, &gottd, p);
2509                 if (gotfd && gottd) {
2510                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2511                 } else if (gotfd) {
2512                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2513                 } else if (gottd) {
2514                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2515                 }
2516                 if (gotfd) {
2517                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2518                         if (NFSHASNFSV4N(nmp))
2519                                 *tl++ = 0;
2520                         else
2521                                 *tl++ = fdstateid.seqid;
2522                         *tl++ = fdstateid.other[0];
2523                         *tl++ = fdstateid.other[1];
2524                         *tl = fdstateid.other[2];
2525                         if (gottd) {
2526                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2527                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2528                                 np = VTONFS(tvp);
2529                                 (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2530                                     np->n_fhp->nfh_len, 0);
2531                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2532                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2533                         }
2534                 }
2535                 if (gottd) {
2536                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2537                         if (NFSHASNFSV4N(nmp))
2538                                 *tl++ = 0;
2539                         else
2540                                 *tl++ = tdstateid.seqid;
2541                         *tl++ = tdstateid.other[0];
2542                         *tl++ = tdstateid.other[1];
2543                         *tl = tdstateid.other[2];
2544                 }
2545                 if (ret > 0) {
2546                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2547                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2548                         np = VTONFS(fdvp);
2549                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2550                             np->n_fhp->nfh_len, 0);
2551                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2552                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2553                 }
2554         } else {
2555                 ret = 0;
2556         }
2557         if (ret == 0)
2558                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2559         if (nd->nd_flag & ND_NFSV4) {
2560                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2561                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2562                 NFSWCCATTR_ATTRBIT(&attrbits);
2563                 (void) nfsrv_putattrbit(nd, &attrbits);
2564                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2565                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2566                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2567                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2568                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2569                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2570                 (void) nfsrv_putattrbit(nd, &attrbits);
2571                 nd->nd_flag |= ND_V4WCCATTR;
2572                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2573                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2574         }
2575         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2576         if (!(nd->nd_flag & ND_NFSV4))
2577                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2578                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2579         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2580         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2581         if (error)
2582                 return (error);
2583         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2584                 /* For NFSv4, parse out any Delereturn replies. */
2585                 if (ret > 0 && nd->nd_repstat != 0 &&
2586                     (nd->nd_flag & ND_NOMOREDATA)) {
2587                         /*
2588                          * If the Delegreturn failed, try again without
2589                          * it. The server will Recall, as required.
2590                          */
2591                         m_freem(nd->nd_mrep);
2592                         goto tryagain;
2593                 }
2594                 for (i = 0; i < (ret * 2); i++) {
2595                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2596                             ND_NFSV4) {
2597                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2598                             if (*(tl + 1)) {
2599                                 if (i == 0 && ret > 1) {
2600                                     /*
2601                                      * If the Delegreturn failed, try again
2602                                      * without it. The server will Recall, as
2603                                      * required.
2604                                      * If ret > 1, the first iteration of this
2605                                      * loop is the second DelegReturn result.
2606                                      */
2607                                     m_freem(nd->nd_mrep);
2608                                     goto tryagain;
2609                                 } else {
2610                                     nd->nd_flag |= ND_NOMOREDATA;
2611                                 }
2612                             }
2613                         }
2614                 }
2615                 /* Now, the first wcc attribute reply. */
2616                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2617                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2618                         if (*(tl + 1))
2619                                 nd->nd_flag |= ND_NOMOREDATA;
2620                 }
2621                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2622                     fstuff);
2623                 /* and the second wcc attribute reply. */
2624                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2625                     !error) {
2626                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2627                         if (*(tl + 1))
2628                                 nd->nd_flag |= ND_NOMOREDATA;
2629                 }
2630                 if (!error)
2631                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2632                             NULL, tstuff);
2633         }
2634         if (nd->nd_repstat && !error)
2635                 error = nd->nd_repstat;
2636 nfsmout:
2637         m_freem(nd->nd_mrep);
2638         return (error);
2639 }
2640
2641 /*
2642  * nfs hard link create rpc
2643  */
2644 int
2645 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2646     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2647     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2648 {
2649         u_int32_t *tl;
2650         struct nfsrv_descript nfsd, *nd = &nfsd;
2651         nfsattrbit_t attrbits;
2652         int error = 0;
2653
2654         *attrflagp = 0;
2655         *dattrflagp = 0;
2656         if (namelen > NFS_MAXNAMLEN)
2657                 return (ENAMETOOLONG);
2658         NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2659         if (nd->nd_flag & ND_NFSV4) {
2660                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2661                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2662         }
2663         (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2664                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2665         if (nd->nd_flag & ND_NFSV4) {
2666                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2667                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2668                 NFSWCCATTR_ATTRBIT(&attrbits);
2669                 (void) nfsrv_putattrbit(nd, &attrbits);
2670                 nd->nd_flag |= ND_V4WCCATTR;
2671                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2672                 *tl = txdr_unsigned(NFSV4OP_LINK);
2673         }
2674         (void) nfsm_strtom(nd, name, namelen);
2675         error = nfscl_request(nd, vp, p, cred, dstuff);
2676         if (error)
2677                 return (error);
2678         if (nd->nd_flag & ND_NFSV3) {
2679                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2680                 if (!error)
2681                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2682                             NULL, dstuff);
2683         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2684                 /*
2685                  * First, parse out the PutFH and Getattr result.
2686                  */
2687                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2688                 if (!(*(tl + 1)))
2689                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2690                 if (*(tl + 1))
2691                         nd->nd_flag |= ND_NOMOREDATA;
2692                 /*
2693                  * Get the pre-op attributes.
2694                  */
2695                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2696         }
2697         if (nd->nd_repstat && !error)
2698                 error = nd->nd_repstat;
2699 nfsmout:
2700         m_freem(nd->nd_mrep);
2701         return (error);
2702 }
2703
2704 /*
2705  * nfs symbolic link create rpc
2706  */
2707 int
2708 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
2709     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2710     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2711     int *dattrflagp, void *dstuff)
2712 {
2713         u_int32_t *tl;
2714         struct nfsrv_descript nfsd, *nd = &nfsd;
2715         struct nfsmount *nmp;
2716         int slen, error = 0;
2717
2718         *nfhpp = NULL;
2719         *attrflagp = 0;
2720         *dattrflagp = 0;
2721         nmp = VFSTONFS(dvp->v_mount);
2722         slen = strlen(target);
2723         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2724                 return (ENAMETOOLONG);
2725         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2726         if (nd->nd_flag & ND_NFSV4) {
2727                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2728                 *tl = txdr_unsigned(NFLNK);
2729                 (void) nfsm_strtom(nd, target, slen);
2730         }
2731         (void) nfsm_strtom(nd, name, namelen);
2732         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2733                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2734         if (!(nd->nd_flag & ND_NFSV4))
2735                 (void) nfsm_strtom(nd, target, slen);
2736         if (nd->nd_flag & ND_NFSV2)
2737                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2738         error = nfscl_request(nd, dvp, p, cred, dstuff);
2739         if (error)
2740                 return (error);
2741         if (nd->nd_flag & ND_NFSV4)
2742                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2743         if ((nd->nd_flag & ND_NFSV3) && !error) {
2744                 if (!nd->nd_repstat)
2745                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2746                 if (!error)
2747                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2748                             NULL, dstuff);
2749         }
2750         if (nd->nd_repstat && !error)
2751                 error = nd->nd_repstat;
2752         m_freem(nd->nd_mrep);
2753         /*
2754          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2755          * Only do this if vfs.nfs.ignore_eexist is set.
2756          * Never do this for NFSv4.1 or later minor versions, since sessions
2757          * should guarantee "exactly once" RPC semantics.
2758          */
2759         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2760             nmp->nm_minorvers == 0))
2761                 error = 0;
2762         return (error);
2763 }
2764
2765 /*
2766  * nfs make dir rpc
2767  */
2768 int
2769 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2770     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2771     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2772     int *dattrflagp, void *dstuff)
2773 {
2774         u_int32_t *tl;
2775         struct nfsrv_descript nfsd, *nd = &nfsd;
2776         nfsattrbit_t attrbits;
2777         int error = 0;
2778         struct nfsfh *fhp;
2779         struct nfsmount *nmp;
2780
2781         *nfhpp = NULL;
2782         *attrflagp = 0;
2783         *dattrflagp = 0;
2784         nmp = VFSTONFS(dvp->v_mount);
2785         fhp = VTONFS(dvp)->n_fhp;
2786         if (namelen > NFS_MAXNAMLEN)
2787                 return (ENAMETOOLONG);
2788         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2789         if (nd->nd_flag & ND_NFSV4) {
2790                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2791                 *tl = txdr_unsigned(NFDIR);
2792         }
2793         (void) nfsm_strtom(nd, name, namelen);
2794         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2795         if (nd->nd_flag & ND_NFSV4) {
2796                 NFSGETATTR_ATTRBIT(&attrbits);
2797                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2798                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2799                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2800                 (void) nfsrv_putattrbit(nd, &attrbits);
2801                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2802                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2803                 (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2804                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2805                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2806                 (void) nfsrv_putattrbit(nd, &attrbits);
2807         }
2808         error = nfscl_request(nd, dvp, p, cred, dstuff);
2809         if (error)
2810                 return (error);
2811         if (nd->nd_flag & ND_NFSV4)
2812                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2813         if (!nd->nd_repstat && !error) {
2814                 if (nd->nd_flag & ND_NFSV4) {
2815                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2816                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2817                 }
2818                 if (!error)
2819                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2820                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2821                         /* Get rid of the PutFH and Getattr status values. */
2822                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2823                         /* Load the directory attributes. */
2824                         error = nfsm_loadattr(nd, dnap);
2825                         if (error == 0)
2826                                 *dattrflagp = 1;
2827                 }
2828         }
2829         if ((nd->nd_flag & ND_NFSV3) && !error)
2830                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2831         if (nd->nd_repstat && !error)
2832                 error = nd->nd_repstat;
2833 nfsmout:
2834         m_freem(nd->nd_mrep);
2835         /*
2836          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2837          * Only do this if vfs.nfs.ignore_eexist is set.
2838          * Never do this for NFSv4.1 or later minor versions, since sessions
2839          * should guarantee "exactly once" RPC semantics.
2840          */
2841         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2842             nmp->nm_minorvers == 0))
2843                 error = 0;
2844         return (error);
2845 }
2846
2847 /*
2848  * nfs remove directory call
2849  */
2850 int
2851 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2852     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2853 {
2854         struct nfsrv_descript nfsd, *nd = &nfsd;
2855         int error = 0;
2856
2857         *dattrflagp = 0;
2858         if (namelen > NFS_MAXNAMLEN)
2859                 return (ENAMETOOLONG);
2860         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2861         (void) nfsm_strtom(nd, name, namelen);
2862         error = nfscl_request(nd, dvp, p, cred, dstuff);
2863         if (error)
2864                 return (error);
2865         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2866                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2867         if (nd->nd_repstat && !error)
2868                 error = nd->nd_repstat;
2869         m_freem(nd->nd_mrep);
2870         /*
2871          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2872          */
2873         if (error == ENOENT)
2874                 error = 0;
2875         return (error);
2876 }
2877
2878 /*
2879  * Readdir rpc.
2880  * Always returns with either uio_resid unchanged, if you are at the
2881  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2882  * filled in.
2883  * I felt this would allow caching of directory blocks more easily
2884  * than returning a pertially filled block.
2885  * Directory offset cookies:
2886  * Oh my, what to do with them...
2887  * I can think of three ways to deal with them:
2888  * 1 - have the layer above these RPCs maintain a map between logical
2889  *     directory byte offsets and the NFS directory offset cookies
2890  * 2 - pass the opaque directory offset cookies up into userland
2891  *     and let the libc functions deal with them, via the system call
2892  * 3 - return them to userland in the "struct dirent", so future versions
2893  *     of libc can use them and do whatever is necessary to make things work
2894  *     above these rpc calls, in the meantime
2895  * For now, I do #3 by "hiding" the directory offset cookies after the
2896  * d_name field in struct dirent. This is space inside d_reclen that
2897  * will be ignored by anything that doesn't know about them.
2898  * The directory offset cookies are filled in as the last 8 bytes of
2899  * each directory entry, after d_name. Someday, the userland libc
2900  * functions may be able to use these. In the meantime, it satisfies
2901  * OpenBSD's requirements for cookies being returned.
2902  * If expects the directory offset cookie for the read to be in uio_offset
2903  * and returns the one for the next entry after this directory block in
2904  * there, as well.
2905  */
2906 int
2907 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2908     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2909     int *eofp, void *stuff)
2910 {
2911         int len, left;
2912         struct dirent *dp = NULL;
2913         u_int32_t *tl;
2914         nfsquad_t cookie, ncookie;
2915         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2916         struct nfsnode *dnp = VTONFS(vp);
2917         struct nfsvattr nfsva;
2918         struct nfsrv_descript nfsd, *nd = &nfsd;
2919         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2920         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2921         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2922         char *cp;
2923         nfsattrbit_t attrbits, dattrbits;
2924         u_int32_t rderr, *tl2 = NULL;
2925         size_t tresid;
2926
2927         KASSERT(uiop->uio_iovcnt == 1 &&
2928             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
2929             ("nfs readdirrpc bad uio"));
2930         ncookie.lval[0] = ncookie.lval[1] = 0;
2931         /*
2932          * There is no point in reading a lot more than uio_resid, however
2933          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2934          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2935          * will never make readsize > nm_readdirsize.
2936          */
2937         readsize = nmp->nm_readdirsize;
2938         if (readsize > uiop->uio_resid)
2939                 readsize = uiop->uio_resid + DIRBLKSIZ;
2940
2941         *attrflagp = 0;
2942         if (eofp)
2943                 *eofp = 0;
2944         tresid = uiop->uio_resid;
2945         cookie.lval[0] = cookiep->nfsuquad[0];
2946         cookie.lval[1] = cookiep->nfsuquad[1];
2947         nd->nd_mrep = NULL;
2948
2949         /*
2950          * For NFSv4, first create the "." and ".." entries.
2951          */
2952         if (NFSHASNFSV4(nmp)) {
2953                 reqsize = 6 * NFSX_UNSIGNED;
2954                 NFSGETATTR_ATTRBIT(&dattrbits);
2955                 NFSZERO_ATTRBIT(&attrbits);
2956                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2957                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2958                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2959                     NFSATTRBIT_MOUNTEDONFILEID)) {
2960                         NFSSETBIT_ATTRBIT(&attrbits,
2961                             NFSATTRBIT_MOUNTEDONFILEID);
2962                         gotmnton = 1;
2963                 } else {
2964                         /*
2965                          * Must fake it. Use the fileno, except when the
2966                          * fsid is != to that of the directory. For that
2967                          * case, generate a fake fileno that is not the same.
2968                          */
2969                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2970                         gotmnton = 0;
2971                 }
2972
2973                 /*
2974                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2975                  */
2976                 if (uiop->uio_offset == 0) {
2977                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2978                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2979                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2980                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2981                         (void) nfsrv_putattrbit(nd, &attrbits);
2982                         error = nfscl_request(nd, vp, p, cred, stuff);
2983                         if (error)
2984                             return (error);
2985                         dotfileid = 0;  /* Fake out the compiler. */
2986                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2987                             error = nfsm_loadattr(nd, &nfsva);
2988                             if (error != 0)
2989                                 goto nfsmout;
2990                             dotfileid = nfsva.na_fileid;
2991                         }
2992                         if (nd->nd_repstat == 0) {
2993                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2994                             len = fxdr_unsigned(int, *(tl + 4));
2995                             if (len > 0 && len <= NFSX_V4FHMAX)
2996                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2997                             else
2998                                 error = EPERM;
2999                             if (!error) {
3000                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3001                                 nfsva.na_mntonfileno = UINT64_MAX;
3002                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3003                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3004                                     NULL, NULL, NULL, p, cred);
3005                                 if (error) {
3006                                     dotdotfileid = dotfileid;
3007                                 } else if (gotmnton) {
3008                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3009                                         dotdotfileid = nfsva.na_mntonfileno;
3010                                     else
3011                                         dotdotfileid = nfsva.na_fileid;
3012                                 } else if (nfsva.na_filesid[0] ==
3013                                     dnp->n_vattr.na_filesid[0] &&
3014                                     nfsva.na_filesid[1] ==
3015                                     dnp->n_vattr.na_filesid[1]) {
3016                                     dotdotfileid = nfsva.na_fileid;
3017                                 } else {
3018                                     do {
3019                                         fakefileno--;
3020                                     } while (fakefileno ==
3021                                         nfsva.na_fileid);
3022                                     dotdotfileid = fakefileno;
3023                                 }
3024                             }
3025                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3026                             /*
3027                              * Lookupp returns NFSERR_NOENT when we are
3028                              * at the root, so just use the current dir.
3029                              */
3030                             nd->nd_repstat = 0;
3031                             dotdotfileid = dotfileid;
3032                         } else {
3033                             error = nd->nd_repstat;
3034                         }
3035                         m_freem(nd->nd_mrep);
3036                         if (error)
3037                             return (error);
3038                         nd->nd_mrep = NULL;
3039                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3040                         dp->d_pad0 = dp->d_pad1 = 0;
3041                         dp->d_off = 0;
3042                         dp->d_type = DT_DIR;
3043                         dp->d_fileno = dotfileid;
3044                         dp->d_namlen = 1;
3045                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3046                         dp->d_name[0] = '.';
3047                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3048                         /*
3049                          * Just make these offset cookie 0.
3050                          */
3051                         tl = (u_int32_t *)&dp->d_name[8];
3052                         *tl++ = 0;
3053                         *tl = 0;
3054                         blksiz += dp->d_reclen;
3055                         uiop->uio_resid -= dp->d_reclen;
3056                         uiop->uio_offset += dp->d_reclen;
3057                         uiop->uio_iov->iov_base =
3058                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3059                         uiop->uio_iov->iov_len -= dp->d_reclen;
3060                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3061                         dp->d_pad0 = dp->d_pad1 = 0;
3062                         dp->d_off = 0;
3063                         dp->d_type = DT_DIR;
3064                         dp->d_fileno = dotdotfileid;
3065                         dp->d_namlen = 2;
3066                         *((uint64_t *)dp->d_name) = 0;
3067                         dp->d_name[0] = '.';
3068                         dp->d_name[1] = '.';
3069                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3070                         /*
3071                          * Just make these offset cookie 0.
3072                          */
3073                         tl = (u_int32_t *)&dp->d_name[8];
3074                         *tl++ = 0;
3075                         *tl = 0;
3076                         blksiz += dp->d_reclen;
3077                         uiop->uio_resid -= dp->d_reclen;
3078                         uiop->uio_offset += dp->d_reclen;
3079                         uiop->uio_iov->iov_base =
3080                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3081                         uiop->uio_iov->iov_len -= dp->d_reclen;
3082                 }
3083                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3084         } else {
3085                 reqsize = 5 * NFSX_UNSIGNED;
3086         }
3087
3088         /*
3089          * Loop around doing readdir rpc's of size readsize.
3090          * The stopping criteria is EOF or buffer full.
3091          */
3092         while (more_dirs && bigenough) {
3093                 *attrflagp = 0;
3094                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3095                 if (nd->nd_flag & ND_NFSV2) {
3096                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3097                         *tl++ = cookie.lval[1];
3098                         *tl = txdr_unsigned(readsize);
3099                 } else {
3100                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3101                         *tl++ = cookie.lval[0];
3102                         *tl++ = cookie.lval[1];
3103                         if (cookie.qval == 0) {
3104                                 *tl++ = 0;
3105                                 *tl++ = 0;
3106                         } else {
3107                                 NFSLOCKNODE(dnp);
3108                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3109                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3110                                 NFSUNLOCKNODE(dnp);
3111                         }
3112                         if (nd->nd_flag & ND_NFSV4) {
3113                                 *tl++ = txdr_unsigned(readsize);
3114                                 *tl = txdr_unsigned(readsize);
3115                                 (void) nfsrv_putattrbit(nd, &attrbits);
3116                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3117                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3118                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3119                         } else {
3120                                 *tl = txdr_unsigned(readsize);
3121                         }
3122                 }
3123                 error = nfscl_request(nd, vp, p, cred, stuff);
3124                 if (error)
3125                         return (error);
3126                 if (!(nd->nd_flag & ND_NFSV2)) {
3127                         if (nd->nd_flag & ND_NFSV3)
3128                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3129                                     stuff);
3130                         if (!nd->nd_repstat && !error) {
3131                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3132                                 NFSLOCKNODE(dnp);
3133                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3134                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3135                                 NFSUNLOCKNODE(dnp);
3136                         }
3137                 }
3138                 if (nd->nd_repstat || error) {
3139                         if (!error)
3140                                 error = nd->nd_repstat;
3141                         goto nfsmout;
3142                 }
3143                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3144                 more_dirs = fxdr_unsigned(int, *tl);
3145                 if (!more_dirs)
3146                         tryformoredirs = 0;
3147
3148                 /* loop through the dir entries, doctoring them to 4bsd form */
3149                 while (more_dirs && bigenough) {
3150                         if (nd->nd_flag & ND_NFSV4) {
3151                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3152                                 ncookie.lval[0] = *tl++;
3153                                 ncookie.lval[1] = *tl++;
3154                                 len = fxdr_unsigned(int, *tl);
3155                         } else if (nd->nd_flag & ND_NFSV3) {
3156                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3157                                 nfsva.na_fileid = fxdr_hyper(tl);
3158                                 tl += 2;
3159                                 len = fxdr_unsigned(int, *tl);
3160                         } else {
3161                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3162                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3163                                     *tl++);
3164                                 len = fxdr_unsigned(int, *tl);
3165                         }
3166                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3167                                 error = EBADRPC;
3168                                 goto nfsmout;
3169                         }
3170                         tlen = roundup2(len, 8);
3171                         if (tlen == len)
3172                                 tlen += 8;  /* To ensure null termination. */
3173                         left = DIRBLKSIZ - blksiz;
3174                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3175                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3176                                 dp->d_reclen += left;
3177                                 uiop->uio_iov->iov_base =
3178                                     (char *)uiop->uio_iov->iov_base + left;
3179                                 uiop->uio_iov->iov_len -= left;
3180                                 uiop->uio_resid -= left;
3181                                 uiop->uio_offset += left;
3182                                 blksiz = 0;
3183                         }
3184                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3185                             uiop->uio_resid)
3186                                 bigenough = 0;
3187                         if (bigenough) {
3188                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3189                                 dp->d_pad0 = dp->d_pad1 = 0;
3190                                 dp->d_off = 0;
3191                                 dp->d_namlen = len;
3192                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3193                                     NFSX_HYPER;
3194                                 dp->d_type = DT_UNKNOWN;
3195                                 blksiz += dp->d_reclen;
3196                                 if (blksiz == DIRBLKSIZ)
3197                                         blksiz = 0;
3198                                 uiop->uio_resid -= DIRHDSIZ;
3199                                 uiop->uio_offset += DIRHDSIZ;
3200                                 uiop->uio_iov->iov_base =
3201                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3202                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3203                                 error = nfsm_mbufuio(nd, uiop, len);
3204                                 if (error)
3205                                         goto nfsmout;
3206                                 cp = uiop->uio_iov->iov_base;
3207                                 tlen -= len;
3208                                 NFSBZERO(cp, tlen);
3209                                 cp += tlen;     /* points to cookie storage */
3210                                 tl2 = (u_int32_t *)cp;
3211                                 uiop->uio_iov->iov_base =
3212                                     (char *)uiop->uio_iov->iov_base + tlen +
3213                                     NFSX_HYPER;
3214                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3215                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3216                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3217                         } else {
3218                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3219                                 if (error)
3220                                         goto nfsmout;
3221                         }
3222                         if (nd->nd_flag & ND_NFSV4) {
3223                                 rderr = 0;
3224                                 nfsva.na_mntonfileno = UINT64_MAX;
3225                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3226                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3227                                     NULL, NULL, &rderr, p, cred);
3228                                 if (error)
3229                                         goto nfsmout;
3230                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3231                         } else if (nd->nd_flag & ND_NFSV3) {
3232                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3233                                 ncookie.lval[0] = *tl++;
3234                                 ncookie.lval[1] = *tl++;
3235                         } else {
3236                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3237                                 ncookie.lval[0] = 0;
3238                                 ncookie.lval[1] = *tl++;
3239                         }
3240                         if (bigenough) {
3241                             if (nd->nd_flag & ND_NFSV4) {
3242                                 if (rderr) {
3243                                     dp->d_fileno = 0;
3244                                 } else {
3245                                     if (gotmnton) {
3246                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3247                                             dp->d_fileno = nfsva.na_mntonfileno;
3248                                         else
3249                                             dp->d_fileno = nfsva.na_fileid;
3250                                     } else if (nfsva.na_filesid[0] ==
3251                                         dnp->n_vattr.na_filesid[0] &&
3252                                         nfsva.na_filesid[1] ==
3253                                         dnp->n_vattr.na_filesid[1]) {
3254                                         dp->d_fileno = nfsva.na_fileid;
3255                                     } else {
3256                                         do {
3257                                             fakefileno--;
3258                                         } while (fakefileno ==
3259                                             nfsva.na_fileid);
3260                                         dp->d_fileno = fakefileno;
3261                                     }
3262                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3263                                 }
3264                             } else {
3265                                 dp->d_fileno = nfsva.na_fileid;
3266                             }
3267                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3268                                 ncookie.lval[0];
3269                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3270                                 ncookie.lval[1];
3271                         }
3272                         more_dirs = fxdr_unsigned(int, *tl);
3273                 }
3274                 /*
3275                  * If at end of rpc data, get the eof boolean
3276                  */
3277                 if (!more_dirs) {
3278                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3279                         eof = fxdr_unsigned(int, *tl);
3280                         if (tryformoredirs)
3281                                 more_dirs = !eof;
3282                         if (nd->nd_flag & ND_NFSV4) {
3283                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3284                                     stuff);
3285                                 if (error)
3286                                         goto nfsmout;
3287                         }
3288                 }
3289                 m_freem(nd->nd_mrep);
3290                 nd->nd_mrep = NULL;
3291         }
3292         /*
3293          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3294          * by increasing d_reclen for the last record.
3295          */
3296         if (blksiz > 0) {
3297                 left = DIRBLKSIZ - blksiz;
3298                 NFSBZERO(uiop->uio_iov->iov_base, left);
3299                 dp->d_reclen += left;
3300                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3301                     left;
3302                 uiop->uio_iov->iov_len -= left;
3303                 uiop->uio_resid -= left;
3304                 uiop->uio_offset += left;
3305         }
3306
3307         /*
3308          * If returning no data, assume end of file.
3309          * If not bigenough, return not end of file, since you aren't
3310          *    returning all the data
3311          * Otherwise, return the eof flag from the server.
3312          */
3313         if (eofp) {
3314                 if (tresid == ((size_t)(uiop->uio_resid)))
3315                         *eofp = 1;
3316                 else if (!bigenough)
3317                         *eofp = 0;
3318                 else
3319                         *eofp = eof;
3320         }
3321
3322         /*
3323          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3324          */
3325         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3326                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3327                 NFSBZERO(dp, DIRBLKSIZ);
3328                 dp->d_type = DT_UNKNOWN;
3329                 tl = (u_int32_t *)&dp->d_name[4];
3330                 *tl++ = cookie.lval[0];
3331                 *tl = cookie.lval[1];
3332                 dp->d_reclen = DIRBLKSIZ;
3333                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3334                     DIRBLKSIZ;
3335                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3336                 uiop->uio_resid -= DIRBLKSIZ;
3337                 uiop->uio_offset += DIRBLKSIZ;
3338         }
3339
3340 nfsmout:
3341         if (nd->nd_mrep != NULL)
3342                 m_freem(nd->nd_mrep);
3343         return (error);
3344 }
3345
3346 #ifndef APPLE
3347 /*
3348  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3349  * (Also used for NFS V4 when mount flag set.)
3350  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3351  */
3352 int
3353 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3354     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3355     int *eofp, void *stuff)
3356 {
3357         int len, left;
3358         struct dirent *dp = NULL;
3359         u_int32_t *tl;
3360         vnode_t newvp = NULLVP;
3361         struct nfsrv_descript nfsd, *nd = &nfsd;
3362         struct nameidata nami, *ndp = &nami;
3363         struct componentname *cnp = &ndp->ni_cnd;
3364         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3365         struct nfsnode *dnp = VTONFS(vp), *np;
3366         struct nfsvattr nfsva;
3367         struct nfsfh *nfhp;
3368         nfsquad_t cookie, ncookie;
3369         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3370         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3371         int isdotdot = 0, unlocknewvp = 0;
3372         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3373         u_int64_t fileno = 0;
3374         char *cp;
3375         nfsattrbit_t attrbits, dattrbits;
3376         size_t tresid;
3377         u_int32_t *tl2 = NULL, rderr;
3378         struct timespec dctime;
3379
3380         KASSERT(uiop->uio_iovcnt == 1 &&
3381             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3382             ("nfs readdirplusrpc bad uio"));
3383         ncookie.lval[0] = ncookie.lval[1] = 0;
3384         timespecclear(&dctime);
3385         *attrflagp = 0;
3386         if (eofp != NULL)
3387                 *eofp = 0;
3388         ndp->ni_dvp = vp;
3389         nd->nd_mrep = NULL;
3390         cookie.lval[0] = cookiep->nfsuquad[0];
3391         cookie.lval[1] = cookiep->nfsuquad[1];
3392         tresid = uiop->uio_resid;
3393
3394         /*
3395          * For NFSv4, first create the "." and ".." entries.
3396          */
3397         if (NFSHASNFSV4(nmp)) {
3398                 NFSGETATTR_ATTRBIT(&dattrbits);
3399                 NFSZERO_ATTRBIT(&attrbits);
3400                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3401                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3402                     NFSATTRBIT_MOUNTEDONFILEID)) {
3403                         NFSSETBIT_ATTRBIT(&attrbits,
3404                             NFSATTRBIT_MOUNTEDONFILEID);
3405                         gotmnton = 1;
3406                 } else {
3407                         /*
3408                          * Must fake it. Use the fileno, except when the
3409                          * fsid is != to that of the directory. For that
3410                          * case, generate a fake fileno that is not the same.
3411                          */
3412                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3413                         gotmnton = 0;
3414                 }
3415
3416                 /*
3417                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3418                  */
3419                 if (uiop->uio_offset == 0) {
3420                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3421                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3422                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3423                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3424                         (void) nfsrv_putattrbit(nd, &attrbits);
3425                         error = nfscl_request(nd, vp, p, cred, stuff);
3426                         if (error)
3427                             return (error);
3428                         dotfileid = 0;  /* Fake out the compiler. */
3429                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3430                             error = nfsm_loadattr(nd, &nfsva);
3431                             if (error != 0)
3432                                 goto nfsmout;
3433                             dctime = nfsva.na_ctime;
3434                             dotfileid = nfsva.na_fileid;
3435                         }
3436                         if (nd->nd_repstat == 0) {
3437                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3438                             len = fxdr_unsigned(int, *(tl + 4));
3439                             if (len > 0 && len <= NFSX_V4FHMAX)
3440                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3441                             else
3442                                 error = EPERM;
3443                             if (!error) {
3444                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3445                                 nfsva.na_mntonfileno = UINT64_MAX;
3446                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3447                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3448                                     NULL, NULL, NULL, p, cred);
3449                                 if (error) {
3450                                     dotdotfileid = dotfileid;
3451                                 } else if (gotmnton) {
3452                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3453                                         dotdotfileid = nfsva.na_mntonfileno;
3454                                     else
3455                                         dotdotfileid = nfsva.na_fileid;
3456                                 } else if (nfsva.na_filesid[0] ==
3457                                     dnp->n_vattr.na_filesid[0] &&
3458                                     nfsva.na_filesid[1] ==
3459                                     dnp->n_vattr.na_filesid[1]) {
3460                                     dotdotfileid = nfsva.na_fileid;
3461                                 } else {
3462                                     do {
3463                                         fakefileno--;
3464                                     } while (fakefileno ==
3465                                         nfsva.na_fileid);
3466                                     dotdotfileid = fakefileno;
3467                                 }
3468                             }
3469                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3470                             /*
3471                              * Lookupp returns NFSERR_NOENT when we are
3472                              * at the root, so just use the current dir.
3473                              */
3474                             nd->nd_repstat = 0;
3475                             dotdotfileid = dotfileid;
3476                         } else {
3477                             error = nd->nd_repstat;
3478                         }
3479                         m_freem(nd->nd_mrep);
3480                         if (error)
3481                             return (error);
3482                         nd->nd_mrep = NULL;
3483                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3484                         dp->d_pad0 = dp->d_pad1 = 0;
3485                         dp->d_off = 0;
3486                         dp->d_type = DT_DIR;
3487                         dp->d_fileno = dotfileid;
3488                         dp->d_namlen = 1;
3489                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3490                         dp->d_name[0] = '.';
3491                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3492                         /*
3493                          * Just make these offset cookie 0.
3494                          */
3495                         tl = (u_int32_t *)&dp->d_name[8];
3496                         *tl++ = 0;
3497                         *tl = 0;
3498                         blksiz += dp->d_reclen;
3499                         uiop->uio_resid -= dp->d_reclen;
3500                         uiop->uio_offset += dp->d_reclen;
3501                         uiop->uio_iov->iov_base =
3502                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3503                         uiop->uio_iov->iov_len -= dp->d_reclen;
3504                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3505                         dp->d_pad0 = dp->d_pad1 = 0;
3506                         dp->d_off = 0;
3507                         dp->d_type = DT_DIR;
3508                         dp->d_fileno = dotdotfileid;
3509                         dp->d_namlen = 2;
3510                         *((uint64_t *)dp->d_name) = 0;
3511                         dp->d_name[0] = '.';
3512                         dp->d_name[1] = '.';
3513                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3514                         /*
3515                          * Just make these offset cookie 0.
3516                          */
3517                         tl = (u_int32_t *)&dp->d_name[8];
3518                         *tl++ = 0;
3519                         *tl = 0;
3520                         blksiz += dp->d_reclen;
3521                         uiop->uio_resid -= dp->d_reclen;
3522                         uiop->uio_offset += dp->d_reclen;
3523                         uiop->uio_iov->iov_base =
3524                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3525                         uiop->uio_iov->iov_len -= dp->d_reclen;
3526                 }
3527                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3528                 if (gotmnton)
3529                         NFSSETBIT_ATTRBIT(&attrbits,
3530                             NFSATTRBIT_MOUNTEDONFILEID);
3531         }
3532
3533         /*
3534          * Loop around doing readdir rpc's of size nm_readdirsize.
3535          * The stopping criteria is EOF or buffer full.
3536          */
3537         while (more_dirs && bigenough) {
3538                 *attrflagp = 0;
3539                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3540                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3541                 *tl++ = cookie.lval[0];
3542                 *tl++ = cookie.lval[1];
3543                 if (cookie.qval == 0) {
3544                         *tl++ = 0;
3545                         *tl++ = 0;
3546                 } else {
3547                         NFSLOCKNODE(dnp);
3548                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3549                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3550                         NFSUNLOCKNODE(dnp);
3551                 }
3552                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3553                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3554                 if (nd->nd_flag & ND_NFSV4) {
3555                         (void) nfsrv_putattrbit(nd, &attrbits);
3556                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3557                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3558                         (void) nfsrv_putattrbit(nd, &dattrbits);
3559                 }
3560                 error = nfscl_request(nd, vp, p, cred, stuff);
3561                 if (error)
3562                         return (error);
3563                 if (nd->nd_flag & ND_NFSV3)
3564                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3565                 if (nd->nd_repstat || error) {
3566                         if (!error)
3567                                 error = nd->nd_repstat;
3568                         goto nfsmout;
3569                 }
3570                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3571                         dctime = nap->na_ctime;
3572                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3573                 NFSLOCKNODE(dnp);
3574                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3575                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3576                 NFSUNLOCKNODE(dnp);
3577                 more_dirs = fxdr_unsigned(int, *tl);
3578                 if (!more_dirs)
3579                         tryformoredirs = 0;
3580
3581                 /* loop through the dir entries, doctoring them to 4bsd form */
3582                 while (more_dirs && bigenough) {
3583                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3584                         if (nd->nd_flag & ND_NFSV4) {
3585                                 ncookie.lval[0] = *tl++;
3586                                 ncookie.lval[1] = *tl++;
3587                         } else {
3588                                 fileno = fxdr_hyper(tl);
3589                                 tl += 2;
3590                         }
3591                         len = fxdr_unsigned(int, *tl);
3592                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3593                                 error = EBADRPC;
3594                                 goto nfsmout;
3595                         }
3596                         tlen = roundup2(len, 8);
3597                         if (tlen == len)
3598                                 tlen += 8;  /* To ensure null termination. */
3599                         left = DIRBLKSIZ - blksiz;
3600                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3601                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3602                                 dp->d_reclen += left;
3603                                 uiop->uio_iov->iov_base =
3604                                     (char *)uiop->uio_iov->iov_base + left;
3605                                 uiop->uio_iov->iov_len -= left;
3606                                 uiop->uio_resid -= left;
3607                                 uiop->uio_offset += left;
3608                                 blksiz = 0;
3609                         }
3610                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3611                             uiop->uio_resid)
3612                                 bigenough = 0;
3613                         if (bigenough) {
3614                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3615                                 dp->d_pad0 = dp->d_pad1 = 0;
3616                                 dp->d_off = 0;
3617                                 dp->d_namlen = len;
3618                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3619                                     NFSX_HYPER;
3620                                 dp->d_type = DT_UNKNOWN;
3621                                 blksiz += dp->d_reclen;
3622                                 if (blksiz == DIRBLKSIZ)
3623                                         blksiz = 0;
3624                                 uiop->uio_resid -= DIRHDSIZ;
3625                                 uiop->uio_offset += DIRHDSIZ;
3626                                 uiop->uio_iov->iov_base =
3627                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3628                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3629                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
3630                                 cnp->cn_namelen = len;
3631                                 NFSCNHASHZERO(cnp);
3632                                 error = nfsm_mbufuio(nd, uiop, len);
3633                                 if (error)
3634                                         goto nfsmout;
3635                                 cp = uiop->uio_iov->iov_base;
3636                                 tlen -= len;
3637                                 NFSBZERO(cp, tlen);
3638                                 cp += tlen;     /* points to cookie storage */
3639                                 tl2 = (u_int32_t *)cp;
3640                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3641                                     cnp->cn_nameptr[1] == '.')
3642                                         isdotdot = 1;
3643                                 else
3644                                         isdotdot = 0;
3645                                 uiop->uio_iov->iov_base =
3646                                     (char *)uiop->uio_iov->iov_base + tlen +
3647                                     NFSX_HYPER;
3648                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3649                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3650                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3651                         } else {
3652                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3653                                 if (error)
3654                                         goto nfsmout;
3655                         }
3656                         nfhp = NULL;
3657                         if (nd->nd_flag & ND_NFSV3) {
3658                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3659                                 ncookie.lval[0] = *tl++;
3660                                 ncookie.lval[1] = *tl++;
3661                                 attrflag = fxdr_unsigned(int, *tl);
3662                                 if (attrflag) {
3663                                   error = nfsm_loadattr(nd, &nfsva);
3664                                   if (error)
3665                                         goto nfsmout;
3666                                 }
3667                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3668                                 if (*tl) {
3669                                         error = nfsm_getfh(nd, &nfhp);
3670                                         if (error)
3671                                             goto nfsmout;
3672                                 }
3673                                 if (!attrflag && nfhp != NULL) {
3674                                         free(nfhp, M_NFSFH);
3675                                         nfhp = NULL;
3676                                 }
3677                         } else {
3678                                 rderr = 0;
3679                                 nfsva.na_mntonfileno = 0xffffffff;
3680                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3681                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3682                                     NULL, NULL, &rderr, p, cred);
3683                                 if (error)
3684                                         goto nfsmout;
3685                         }
3686
3687                         if (bigenough) {
3688                             if (nd->nd_flag & ND_NFSV4) {
3689                                 if (rderr) {
3690                                     dp->d_fileno = 0;
3691                                 } else if (gotmnton) {
3692                                     if (nfsva.na_mntonfileno != 0xffffffff)
3693                                         dp->d_fileno = nfsva.na_mntonfileno;
3694                                     else
3695                                         dp->d_fileno = nfsva.na_fileid;
3696                                 } else if (nfsva.na_filesid[0] ==
3697                                     dnp->n_vattr.na_filesid[0] &&
3698                                     nfsva.na_filesid[1] ==
3699                                     dnp->n_vattr.na_filesid[1]) {
3700                                     dp->d_fileno = nfsva.na_fileid;
3701                                 } else {
3702                                     do {
3703                                         fakefileno--;
3704                                     } while (fakefileno ==
3705                                         nfsva.na_fileid);
3706                                     dp->d_fileno = fakefileno;
3707                                 }
3708                             } else {
3709                                 dp->d_fileno = fileno;
3710                             }
3711                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3712                                 ncookie.lval[0];
3713                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3714                                 ncookie.lval[1];
3715
3716                             if (nfhp != NULL) {
3717                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3718                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3719                                     VREF(vp);
3720                                     newvp = vp;
3721                                     unlocknewvp = 0;
3722                                     free(nfhp, M_NFSFH);
3723                                     np = dnp;
3724                                 } else if (isdotdot != 0) {
3725                                     /*
3726                                      * Skip doing a nfscl_nget() call for "..".
3727                                      * There's a race between acquiring the nfs
3728                                      * node here and lookups that look for the
3729                                      * directory being read (in the parent).
3730                                      * It would try to get a lock on ".." here,
3731                                      * owning the lock on the directory being
3732                                      * read. Lookup will hold the lock on ".."
3733                                      * and try to acquire the lock on the
3734                                      * directory being read.
3735                                      * If the directory is unlocked/relocked,
3736                                      * then there is a LOR with the buflock
3737                                      * vp is relocked.
3738                                      */
3739                                     free(nfhp, M_NFSFH);
3740                                 } else {
3741                                     error = nfscl_nget(vp->v_mount, vp,
3742                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3743                                     if (!error) {
3744                                         newvp = NFSTOV(np);
3745                                         unlocknewvp = 1;
3746                                     }
3747                                 }
3748                                 nfhp = NULL;
3749                                 if (newvp != NULLVP) {
3750                                     error = nfscl_loadattrcache(&newvp,
3751                                         &nfsva, NULL, NULL, 0, 0);
3752                                     if (error) {
3753                                         if (unlocknewvp)
3754                                             vput(newvp);
3755                                         else
3756                                             vrele(newvp);
3757                                         goto nfsmout;
3758                                     }
3759                                     dp->d_type =
3760                                         vtonfs_dtype(np->n_vattr.na_type);
3761                                     ndp->ni_vp = newvp;
3762                                     NFSCNHASH(cnp, HASHINIT);
3763                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3764                                         ndp->ni_dvp != ndp->ni_vp &&
3765                                         (newvp->v_type != VDIR ||
3766                                          dctime.tv_sec != 0)) {
3767                                         cache_enter_time(ndp->ni_dvp,
3768                                             ndp->ni_vp, cnp,
3769                                             &nfsva.na_ctime,
3770                                             newvp->v_type != VDIR ? NULL :
3771                                             &dctime);
3772                                     }
3773                                     if (unlocknewvp)
3774                                         vput(newvp);
3775                                     else
3776                                         vrele(newvp);
3777                                     newvp = NULLVP;
3778                                 }
3779                             }
3780                         } else if (nfhp != NULL) {
3781                             free(nfhp, M_NFSFH);
3782                         }
3783                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3784                         more_dirs = fxdr_unsigned(int, *tl);
3785                 }
3786                 /*
3787                  * If at end of rpc data, get the eof boolean
3788                  */
3789                 if (!more_dirs) {
3790                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3791                         eof = fxdr_unsigned(int, *tl);
3792                         if (tryformoredirs)
3793                                 more_dirs = !eof;
3794                         if (nd->nd_flag & ND_NFSV4) {
3795                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3796                                     stuff);
3797                                 if (error)
3798                                         goto nfsmout;
3799                         }
3800                 }
3801                 m_freem(nd->nd_mrep);
3802                 nd->nd_mrep = NULL;
3803         }
3804         /*
3805          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3806          * by increasing d_reclen for the last record.
3807          */
3808         if (blksiz > 0) {
3809                 left = DIRBLKSIZ - blksiz;
3810                 NFSBZERO(uiop->uio_iov->iov_base, left);
3811                 dp->d_reclen += left;
3812                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3813                     left;
3814                 uiop->uio_iov->iov_len -= left;
3815                 uiop->uio_resid -= left;
3816                 uiop->uio_offset += left;
3817         }
3818
3819         /*
3820          * If returning no data, assume end of file.
3821          * If not bigenough, return not end of file, since you aren't
3822          *    returning all the data
3823          * Otherwise, return the eof flag from the server.
3824          */
3825         if (eofp != NULL) {
3826                 if (tresid == uiop->uio_resid)
3827                         *eofp = 1;
3828                 else if (!bigenough)
3829                         *eofp = 0;
3830                 else
3831                         *eofp = eof;
3832         }
3833
3834         /*
3835          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3836          */
3837         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3838                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3839                 NFSBZERO(dp, DIRBLKSIZ);
3840                 dp->d_type = DT_UNKNOWN;
3841                 tl = (u_int32_t *)&dp->d_name[4];
3842                 *tl++ = cookie.lval[0];
3843                 *tl = cookie.lval[1];
3844                 dp->d_reclen = DIRBLKSIZ;
3845                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3846                     DIRBLKSIZ;
3847                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3848                 uiop->uio_resid -= DIRBLKSIZ;
3849                 uiop->uio_offset += DIRBLKSIZ;
3850         }
3851
3852 nfsmout:
3853         if (nd->nd_mrep != NULL)
3854                 m_freem(nd->nd_mrep);
3855         return (error);
3856 }
3857 #endif  /* !APPLE */
3858
3859 /*
3860  * Nfs commit rpc
3861  */
3862 int
3863 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3864     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3865 {
3866         u_int32_t *tl;
3867         struct nfsrv_descript nfsd, *nd = &nfsd;
3868         nfsattrbit_t attrbits;
3869         int error;
3870         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3871
3872         *attrflagp = 0;
3873         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3874         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3875         txdr_hyper(offset, tl);
3876         tl += 2;
3877         *tl = txdr_unsigned(cnt);
3878         if (nd->nd_flag & ND_NFSV4) {
3879                 /*
3880                  * And do a Getattr op.
3881                  */
3882                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3883                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3884                 NFSGETATTR_ATTRBIT(&attrbits);
3885                 (void) nfsrv_putattrbit(nd, &attrbits);
3886         }
3887         error = nfscl_request(nd, vp, p, cred, stuff);
3888         if (error)
3889                 return (error);
3890         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3891         if (!error && !nd->nd_repstat) {
3892                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3893                 NFSLOCKMNT(nmp);
3894                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3895                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3896                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
3897                 }
3898                 NFSUNLOCKMNT(nmp);
3899                 if (nd->nd_flag & ND_NFSV4)
3900                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3901         }
3902 nfsmout:
3903         if (!error && nd->nd_repstat)
3904                 error = nd->nd_repstat;
3905         m_freem(nd->nd_mrep);
3906         return (error);
3907 }
3908
3909 /*
3910  * NFS byte range lock rpc.
3911  * (Mostly just calls one of the three lower level RPC routines.)
3912  */
3913 int
3914 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3915     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3916 {
3917         struct nfscllockowner *lp;
3918         struct nfsclclient *clp;
3919         struct nfsfh *nfhp;
3920         struct nfsrv_descript nfsd, *nd = &nfsd;
3921         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3922         u_int64_t off, len;
3923         off_t start, end;
3924         u_int32_t clidrev = 0;
3925         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3926         int callcnt, dorpc;
3927
3928         /*
3929          * Convert the flock structure into a start and end and do POSIX
3930          * bounds checking.
3931          */
3932         switch (fl->l_whence) {
3933         case SEEK_SET:
3934         case SEEK_CUR:
3935                 /*
3936                  * Caller is responsible for adding any necessary offset
3937                  * when SEEK_CUR is used.
3938                  */
3939                 start = fl->l_start;
3940                 off = fl->l_start;
3941                 break;
3942         case SEEK_END:
3943                 start = size + fl->l_start;
3944                 off = size + fl->l_start;
3945                 break;
3946         default:
3947                 return (EINVAL);
3948         }
3949         if (start < 0)
3950                 return (EINVAL);
3951         if (fl->l_len != 0) {
3952                 end = start + fl->l_len - 1;
3953                 if (end < start)
3954                         return (EINVAL);
3955         }
3956
3957         len = fl->l_len;
3958         if (len == 0)
3959                 len = NFS64BITSSET;
3960         retrycnt = 0;
3961         do {
3962             nd->nd_repstat = 0;
3963             if (op == F_GETLK) {
3964                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3965                 if (error)
3966                         return (error);
3967                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3968                 if (!error) {
3969                         clidrev = clp->nfsc_clientidrev;
3970                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3971                             p, id, flags);
3972                 } else if (error == -1) {
3973                         error = 0;
3974                 }
3975                 nfscl_clientrelease(clp);
3976             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3977                 /*
3978                  * We must loop around for all lockowner cases.
3979                  */
3980                 callcnt = 0;
3981                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3982                 if (error)
3983                         return (error);
3984                 do {
3985                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3986                         clp, id, flags, &lp, &dorpc);
3987                     /*
3988                      * If it returns a NULL lp, we're done.
3989                      */
3990                     if (lp == NULL) {
3991                         if (callcnt == 0)
3992                             nfscl_clientrelease(clp);
3993                         else
3994                             nfscl_releasealllocks(clp, vp, p, id, flags);
3995                         return (error);
3996                     }
3997                     if (nmp->nm_clp != NULL)
3998                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3999                     else
4000                         clidrev = 0;
4001                     /*
4002                      * If the server doesn't support Posix lock semantics,
4003                      * only allow locks on the entire file, since it won't
4004                      * handle overlapping byte ranges.
4005                      * There might still be a problem when a lock
4006                      * upgrade/downgrade (read<->write) occurs, since the
4007                      * server "might" expect an unlock first?
4008                      */
4009                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4010                         (off == 0 && len == NFS64BITSSET))) {
4011                         /*
4012                          * Since the lock records will go away, we must
4013                          * wait for grace and delay here.
4014                          */
4015                         do {
4016                             error = nfsrpc_locku(nd, nmp, lp, off, len,
4017                                 NFSV4LOCKT_READ, cred, p, 0);
4018                             if ((nd->nd_repstat == NFSERR_GRACE ||
4019                                  nd->nd_repstat == NFSERR_DELAY) &&
4020                                 error == 0)
4021                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4022                                     "nfs_advlock");
4023                         } while ((nd->nd_repstat == NFSERR_GRACE ||
4024                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
4025                     }
4026                     callcnt++;
4027                 } while (error == 0 && nd->nd_repstat == 0);
4028                 nfscl_releasealllocks(clp, vp, p, id, flags);
4029             } else if (op == F_SETLK) {
4030                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4031                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4032                 if (error || donelocally) {
4033                         return (error);
4034                 }
4035                 if (nmp->nm_clp != NULL)
4036                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4037                 else
4038                         clidrev = 0;
4039                 nfhp = VTONFS(vp)->n_fhp;
4040                 if (!lp->nfsl_open->nfso_posixlock &&
4041                     (off != 0 || len != NFS64BITSSET)) {
4042                         error = EINVAL;
4043                 } else {
4044                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4045                             nfhp->nfh_len, lp, newone, reclaim, off,
4046                             len, fl->l_type, cred, p, 0);
4047                 }
4048                 if (!error)
4049                         error = nd->nd_repstat;
4050                 nfscl_lockrelease(lp, error, newone);
4051             } else {
4052                 error = EINVAL;
4053             }
4054             if (!error)
4055                 error = nd->nd_repstat;
4056             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4057                 error == NFSERR_STALEDONTRECOVER ||
4058                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4059                 error == NFSERR_BADSESSION) {
4060                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
4061             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4062                 && clidrev != 0) {
4063                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4064                 retrycnt++;
4065             }
4066         } while (error == NFSERR_GRACE ||
4067             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4068             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4069             error == NFSERR_BADSESSION ||
4070             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4071              expireret == 0 && clidrev != 0 && retrycnt < 4));
4072         if (error && retrycnt >= 4)
4073                 error = EIO;
4074         return (error);
4075 }
4076
4077 /*
4078  * The lower level routine for the LockT case.
4079  */
4080 int
4081 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4082     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4083     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4084 {
4085         u_int32_t *tl;
4086         int error, type, size;
4087         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4088         struct nfsnode *np;
4089         struct nfsmount *nmp;
4090         struct nfsclsession *tsep;
4091
4092         nmp = VFSTONFS(vp->v_mount);
4093         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
4094         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4095         if (fl->l_type == F_RDLCK)
4096                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4097         else
4098                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4099         txdr_hyper(off, tl);
4100         tl += 2;
4101         txdr_hyper(len, tl);
4102         tl += 2;
4103         tsep = nfsmnt_mdssession(nmp);
4104         *tl++ = tsep->nfsess_clientid.lval[0];
4105         *tl = tsep->nfsess_clientid.lval[1];
4106         nfscl_filllockowner(id, own, flags);
4107         np = VTONFS(vp);
4108         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4109             np->n_fhp->nfh_len);
4110         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4111         error = nfscl_request(nd, vp, p, cred, NULL);
4112         if (error)
4113                 return (error);
4114         if (nd->nd_repstat == 0) {
4115                 fl->l_type = F_UNLCK;
4116         } else if (nd->nd_repstat == NFSERR_DENIED) {
4117                 nd->nd_repstat = 0;
4118                 fl->l_whence = SEEK_SET;
4119                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4120                 fl->l_start = fxdr_hyper(tl);
4121                 tl += 2;
4122                 len = fxdr_hyper(tl);
4123                 tl += 2;
4124                 if (len == NFS64BITSSET)
4125                         fl->l_len = 0;
4126                 else
4127                         fl->l_len = len;
4128                 type = fxdr_unsigned(int, *tl++);
4129                 if (type == NFSV4LOCKT_WRITE)
4130                         fl->l_type = F_WRLCK;
4131                 else
4132                         fl->l_type = F_RDLCK;
4133                 /*
4134                  * XXX For now, I have no idea what to do with the
4135                  * conflicting lock_owner, so I'll just set the pid == 0
4136                  * and skip over the lock_owner.
4137                  */
4138                 fl->l_pid = (pid_t)0;
4139                 tl += 2;
4140                 size = fxdr_unsigned(int, *tl);
4141                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4142                         error = EBADRPC;
4143                 if (!error)
4144                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4145         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4146                 nfscl_initiate_recovery(clp);
4147 nfsmout:
4148         m_freem(nd->nd_mrep);
4149         return (error);
4150 }
4151
4152 /*
4153  * Lower level function that performs the LockU RPC.
4154  */
4155 static int
4156 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4157     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4158     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4159 {
4160         u_int32_t *tl;
4161         int error;
4162
4163         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4164             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4165         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4166         *tl++ = txdr_unsigned(type);
4167         *tl = txdr_unsigned(lp->nfsl_seqid);
4168         if (nfstest_outofseq &&
4169             (arc4random() % nfstest_outofseq) == 0)
4170                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4171         tl++;
4172         if (NFSHASNFSV4N(nmp))
4173                 *tl++ = 0;
4174         else
4175                 *tl++ = lp->nfsl_stateid.seqid;
4176         *tl++ = lp->nfsl_stateid.other[0];
4177         *tl++ = lp->nfsl_stateid.other[1];
4178         *tl++ = lp->nfsl_stateid.other[2];
4179         txdr_hyper(off, tl);
4180         tl += 2;
4181         txdr_hyper(len, tl);
4182         if (syscred)
4183                 nd->nd_flag |= ND_USEGSSNAME;
4184         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4185             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4186         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4187         if (error)
4188                 return (error);
4189         if (nd->nd_repstat == 0) {
4190                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4191                 lp->nfsl_stateid.seqid = *tl++;
4192                 lp->nfsl_stateid.other[0] = *tl++;
4193                 lp->nfsl_stateid.other[1] = *tl++;
4194                 lp->nfsl_stateid.other[2] = *tl;
4195         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4196                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4197 nfsmout:
4198         m_freem(nd->nd_mrep);
4199         return (error);
4200 }
4201
4202 /*
4203  * The actual Lock RPC.
4204  */
4205 int
4206 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4207     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4208     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4209     NFSPROC_T *p, int syscred)
4210 {
4211         u_int32_t *tl;
4212         int error, size;
4213         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4214         struct nfsclsession *tsep;
4215
4216         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4217         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4218         if (type == F_RDLCK)
4219                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4220         else
4221                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4222         *tl++ = txdr_unsigned(reclaim);
4223         txdr_hyper(off, tl);
4224         tl += 2;
4225         txdr_hyper(len, tl);
4226         tl += 2;
4227         if (newone) {
4228             *tl = newnfs_true;
4229             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4230                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4231             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4232             if (NFSHASNFSV4N(nmp))
4233                 *tl++ = 0;
4234             else
4235                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4236             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4237             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4238             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4239             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4240             tsep = nfsmnt_mdssession(nmp);
4241             *tl++ = tsep->nfsess_clientid.lval[0];
4242             *tl = tsep->nfsess_clientid.lval[1];
4243             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4244             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4245             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4246         } else {
4247             *tl = newnfs_false;
4248             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4249             if (NFSHASNFSV4N(nmp))
4250                 *tl++ = 0;
4251             else
4252                 *tl++ = lp->nfsl_stateid.seqid;
4253             *tl++ = lp->nfsl_stateid.other[0];
4254             *tl++ = lp->nfsl_stateid.other[1];
4255             *tl++ = lp->nfsl_stateid.other[2];
4256             *tl = txdr_unsigned(lp->nfsl_seqid);
4257             if (nfstest_outofseq &&
4258                 (arc4random() % nfstest_outofseq) == 0)
4259                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4260         }
4261         if (syscred)
4262                 nd->nd_flag |= ND_USEGSSNAME;
4263         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4264             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4265         if (error)
4266                 return (error);
4267         if (newone)
4268             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4269         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4270         if (nd->nd_repstat == 0) {
4271                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4272                 lp->nfsl_stateid.seqid = *tl++;
4273                 lp->nfsl_stateid.other[0] = *tl++;
4274                 lp->nfsl_stateid.other[1] = *tl++;
4275                 lp->nfsl_stateid.other[2] = *tl;
4276         } else if (nd->nd_repstat == NFSERR_DENIED) {
4277                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4278                 size = fxdr_unsigned(int, *(tl + 7));
4279                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4280                         error = EBADRPC;
4281                 if (!error)
4282                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4283         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4284                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4285 nfsmout:
4286         m_freem(nd->nd_mrep);
4287         return (error);
4288 }
4289
4290 /*
4291  * nfs statfs rpc
4292  * (always called with the vp for the mount point)
4293  */
4294 int
4295 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4296     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4297     void *stuff)
4298 {
4299         u_int32_t *tl = NULL;
4300         struct nfsrv_descript nfsd, *nd = &nfsd;
4301         struct nfsmount *nmp;
4302         nfsattrbit_t attrbits;
4303         int error;
4304
4305         *attrflagp = 0;
4306         nmp = VFSTONFS(vp->v_mount);
4307         if (NFSHASNFSV4(nmp)) {
4308                 /*
4309                  * For V4, you actually do a getattr.
4310                  */
4311                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4312                 NFSSTATFS_GETATTRBIT(&attrbits);
4313                 (void) nfsrv_putattrbit(nd, &attrbits);
4314                 nd->nd_flag |= ND_USEGSSNAME;
4315                 error = nfscl_request(nd, vp, p, cred, stuff);
4316                 if (error)
4317                         return (error);
4318                 if (nd->nd_repstat == 0) {
4319                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4320                             NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4321                             cred);
4322                         if (!error) {
4323                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4324                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4325                                 NFSSETHASSETFSID(nmp);
4326                                 *attrflagp = 1;
4327                         }
4328                 } else {
4329                         error = nd->nd_repstat;
4330                 }
4331                 if (error)
4332                         goto nfsmout;
4333         } else {
4334                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4335                 error = nfscl_request(nd, vp, p, cred, stuff);
4336                 if (error)
4337                         return (error);
4338                 if (nd->nd_flag & ND_NFSV3) {
4339                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4340                         if (error)
4341                                 goto nfsmout;
4342                 }
4343                 if (nd->nd_repstat) {
4344                         error = nd->nd_repstat;
4345                         goto nfsmout;
4346                 }
4347                 NFSM_DISSECT(tl, u_int32_t *,
4348                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4349         }
4350         if (NFSHASNFSV3(nmp)) {
4351                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4352                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4353                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4354                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4355                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4356                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4357                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4358         } else if (NFSHASNFSV4(nmp) == 0) {
4359                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4360                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4361                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4362                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4363                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4364         }
4365 nfsmout:
4366         m_freem(nd->nd_mrep);
4367         return (error);
4368 }
4369
4370 /*
4371  * nfs pathconf rpc
4372  */
4373 int
4374 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4375     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4376     void *stuff)
4377 {
4378         struct nfsrv_descript nfsd, *nd = &nfsd;
4379         struct nfsmount *nmp;
4380         u_int32_t *tl;
4381         nfsattrbit_t attrbits;
4382         int error;
4383
4384         *attrflagp = 0;
4385         nmp = VFSTONFS(vp->v_mount);
4386         if (NFSHASNFSV4(nmp)) {
4387                 /*
4388                  * For V4, you actually do a getattr.
4389                  */
4390                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4391                 NFSPATHCONF_GETATTRBIT(&attrbits);
4392                 (void) nfsrv_putattrbit(nd, &attrbits);
4393                 nd->nd_flag |= ND_USEGSSNAME;
4394                 error = nfscl_request(nd, vp, p, cred, stuff);
4395                 if (error)
4396                         return (error);
4397                 if (nd->nd_repstat == 0) {
4398                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4399                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4400                             cred);
4401                         if (!error)
4402                                 *attrflagp = 1;
4403                 } else {
4404                         error = nd->nd_repstat;
4405                 }
4406         } else {
4407                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4408                 error = nfscl_request(nd, vp, p, cred, stuff);
4409                 if (error)
4410                         return (error);
4411                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4412                 if (nd->nd_repstat && !error)
4413                         error = nd->nd_repstat;
4414                 if (!error) {
4415                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4416                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4417                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4418                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4419                         pc->pc_chownrestricted =
4420                             fxdr_unsigned(u_int32_t, *tl++);
4421                         pc->pc_caseinsensitive =
4422                             fxdr_unsigned(u_int32_t, *tl++);
4423                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4424                 }
4425         }
4426 nfsmout:
4427         m_freem(nd->nd_mrep);
4428         return (error);
4429 }
4430
4431 /*
4432  * nfs version 3 fsinfo rpc call
4433  */
4434 int
4435 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4436     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4437 {
4438         u_int32_t *tl;
4439         struct nfsrv_descript nfsd, *nd = &nfsd;
4440         int error;
4441
4442         *attrflagp = 0;
4443         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4444         error = nfscl_request(nd, vp, p, cred, stuff);
4445         if (error)
4446                 return (error);
4447         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4448         if (nd->nd_repstat && !error)
4449                 error = nd->nd_repstat;
4450         if (!error) {
4451                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4452                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4453                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4454                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4455                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4456                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4457                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4458                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4459                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4460                 tl += 2;
4461                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4462                 tl += 2;
4463                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4464         }
4465 nfsmout:
4466         m_freem(nd->nd_mrep);
4467         return (error);
4468 }
4469
4470 /*
4471  * This function performs the Renew RPC.
4472  */
4473 int
4474 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4475     NFSPROC_T *p)
4476 {
4477         u_int32_t *tl;
4478         struct nfsrv_descript nfsd;
4479         struct nfsrv_descript *nd = &nfsd;
4480         struct nfsmount *nmp;
4481         int error;
4482         struct nfssockreq *nrp;
4483         struct nfsclsession *tsep;
4484
4485         nmp = clp->nfsc_nmp;
4486         if (nmp == NULL)
4487                 return (0);
4488         if (dsp == NULL)
4489                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4490                     0);
4491         else
4492                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4493                     &dsp->nfsclds_sess, 0, 0);
4494         if (!NFSHASNFSV4N(nmp)) {
4495                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4496                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4497                 tsep = nfsmnt_mdssession(nmp);
4498                 *tl++ = tsep->nfsess_clientid.lval[0];
4499                 *tl = tsep->nfsess_clientid.lval[1];
4500         }
4501         nrp = NULL;
4502         if (dsp != NULL)
4503                 nrp = dsp->nfsclds_sockp;
4504         if (nrp == NULL)
4505                 /* If NULL, use the MDS socket. */
4506                 nrp = &nmp->nm_sockreq;
4507         nd->nd_flag |= ND_USEGSSNAME;
4508         if (dsp == NULL)
4509                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4510                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4511         else {
4512                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4513                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4514                 if (error == ENXIO)
4515                         nfscl_cancelreqs(dsp);
4516         }
4517         if (error)
4518                 return (error);
4519         error = nd->nd_repstat;
4520         m_freem(nd->nd_mrep);
4521         return (error);
4522 }
4523
4524 /*
4525  * This function performs the Releaselockowner RPC.
4526  */
4527 int
4528 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4529     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4530 {
4531         struct nfsrv_descript nfsd, *nd = &nfsd;
4532         u_int32_t *tl;
4533         int error;
4534         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4535         struct nfsclsession *tsep;
4536
4537         if (NFSHASNFSV4N(nmp)) {
4538                 /* For NFSv4.1, do a FreeStateID. */
4539                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4540                     NULL, 0, 0);
4541                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4542         } else {
4543                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4544                     NULL, 0, 0);
4545                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4546                 tsep = nfsmnt_mdssession(nmp);
4547                 *tl++ = tsep->nfsess_clientid.lval[0];
4548                 *tl = tsep->nfsess_clientid.lval[1];
4549                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4550                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4551                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4552         }
4553         nd->nd_flag |= ND_USEGSSNAME;
4554         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4555             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4556         if (error)
4557                 return (error);
4558         error = nd->nd_repstat;
4559         m_freem(nd->nd_mrep);
4560         return (error);
4561 }
4562
4563 /*
4564  * This function performs the Compound to get the mount pt FH.
4565  */
4566 int
4567 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4568     NFSPROC_T *p)
4569 {
4570         u_int32_t *tl;
4571         struct nfsrv_descript nfsd;
4572         struct nfsrv_descript *nd = &nfsd;
4573         u_char *cp, *cp2;
4574         int error, cnt, len, setnil;
4575         u_int32_t *opcntp;
4576
4577         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4578             0);
4579         cp = dirpath;
4580         cnt = 0;
4581         do {
4582                 setnil = 0;
4583                 while (*cp == '/')
4584                         cp++;
4585                 cp2 = cp;
4586                 while (*cp2 != '\0' && *cp2 != '/')
4587                         cp2++;
4588                 if (*cp2 == '/') {
4589                         setnil = 1;
4590                         *cp2 = '\0';
4591                 }
4592                 if (cp2 != cp) {
4593                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4594                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4595                         nfsm_strtom(nd, cp, strlen(cp));
4596                         cnt++;
4597                 }
4598                 if (setnil)
4599                         *cp2++ = '/';
4600                 cp = cp2;
4601         } while (*cp != '\0');
4602         if (NFSHASNFSV4N(nmp))
4603                 /* Has a Sequence Op done by nfscl_reqstart(). */
4604                 *opcntp = txdr_unsigned(3 + cnt);
4605         else
4606                 *opcntp = txdr_unsigned(2 + cnt);
4607         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4608         *tl = txdr_unsigned(NFSV4OP_GETFH);
4609         nd->nd_flag |= ND_USEGSSNAME;
4610         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4611                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4612         if (error)
4613                 return (error);
4614         if (nd->nd_repstat == 0) {
4615                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4616                 tl += (2 + 2 * cnt);
4617                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4618                         len > NFSX_FHMAX) {
4619                         nd->nd_repstat = NFSERR_BADXDR;
4620                 } else {
4621                         nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4622                         if (nd->nd_repstat == 0)
4623                                 nmp->nm_fhsize = len;
4624                 }
4625         }
4626         error = nd->nd_repstat;
4627 nfsmout:
4628         m_freem(nd->nd_mrep);
4629         return (error);
4630 }
4631
4632 /*
4633  * This function performs the Delegreturn RPC.
4634  */
4635 int
4636 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4637     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4638 {
4639         u_int32_t *tl;
4640         struct nfsrv_descript nfsd;
4641         struct nfsrv_descript *nd = &nfsd;
4642         int error;
4643
4644         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4645             dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4646         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4647         if (NFSHASNFSV4N(nmp))
4648                 *tl++ = 0;
4649         else
4650                 *tl++ = dp->nfsdl_stateid.seqid;
4651         *tl++ = dp->nfsdl_stateid.other[0];
4652         *tl++ = dp->nfsdl_stateid.other[1];
4653         *tl = dp->nfsdl_stateid.other[2];
4654         if (syscred)
4655                 nd->nd_flag |= ND_USEGSSNAME;
4656         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4657             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4658         if (error)
4659                 return (error);
4660         error = nd->nd_repstat;
4661         m_freem(nd->nd_mrep);
4662         return (error);
4663 }
4664
4665 /*
4666  * nfs getacl call.
4667  */
4668 int
4669 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4670     struct acl *aclp, void *stuff)
4671 {
4672         struct nfsrv_descript nfsd, *nd = &nfsd;
4673         int error;
4674         nfsattrbit_t attrbits;
4675         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4676
4677         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4678                 return (EOPNOTSUPP);
4679         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4680         NFSZERO_ATTRBIT(&attrbits);
4681         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4682         (void) nfsrv_putattrbit(nd, &attrbits);
4683         error = nfscl_request(nd, vp, p, cred, stuff);
4684         if (error)
4685                 return (error);
4686         if (!nd->nd_repstat)
4687                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4688                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4689         else
4690                 error = nd->nd_repstat;
4691         m_freem(nd->nd_mrep);
4692         return (error);
4693 }
4694
4695 /*
4696  * nfs setacl call.
4697  */
4698 int
4699 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4700     struct acl *aclp, void *stuff)
4701 {
4702         int error;
4703         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4704
4705         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4706                 return (EOPNOTSUPP);
4707         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4708         return (error);
4709 }
4710
4711 /*
4712  * nfs setacl call.
4713  */
4714 static int
4715 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4716     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4717 {
4718         struct nfsrv_descript nfsd, *nd = &nfsd;
4719         int error;
4720         nfsattrbit_t attrbits;
4721         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4722
4723         if (!NFSHASNFSV4(nmp))
4724                 return (EOPNOTSUPP);
4725         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4726         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4727         NFSZERO_ATTRBIT(&attrbits);
4728         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4729         (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
4730             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4731         error = nfscl_request(nd, vp, p, cred, stuff);
4732         if (error)
4733                 return (error);
4734         /* Don't care about the pre/postop attributes */
4735         m_freem(nd->nd_mrep);
4736         return (nd->nd_repstat);
4737 }
4738
4739 /*
4740  * Do the NFSv4.1 Exchange ID.
4741  */
4742 int
4743 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4744     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
4745     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
4746 {
4747         uint32_t *tl, v41flags;
4748         struct nfsrv_descript nfsd;
4749         struct nfsrv_descript *nd = &nfsd;
4750         struct nfsclds *dsp;
4751         struct timespec verstime;
4752         int error, len;
4753
4754         *dspp = NULL;
4755         if (minorvers == 0)
4756                 minorvers = nmp->nm_minorvers;
4757         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
4758             NFS_VER4, minorvers);
4759         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4760         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4761         *tl = txdr_unsigned(clp->nfsc_rev);
4762         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4763
4764         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4765         *tl++ = txdr_unsigned(exchflags);
4766         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4767
4768         /* Set the implementation id4 */
4769         *tl = txdr_unsigned(1);
4770         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4771         (void) nfsm_strtom(nd, version, strlen(version));
4772         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4773         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4774         verstime.tv_nsec = 0;
4775         txdr_nfsv4time(&verstime, tl);
4776         nd->nd_flag |= ND_USEGSSNAME;
4777         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4778             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4779         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4780             (int)nd->nd_repstat);
4781         if (error != 0)
4782                 return (error);
4783         if (nd->nd_repstat == 0) {
4784                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4785                 len = fxdr_unsigned(int, *(tl + 7));
4786                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4787                         error = NFSERR_BADXDR;
4788                         goto nfsmout;
4789                 }
4790                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4791                     M_WAITOK | M_ZERO);
4792                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4793                 dsp->nfsclds_servownlen = len;
4794                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4795                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4796                 dsp->nfsclds_sess.nfsess_sequenceid =
4797                     fxdr_unsigned(uint32_t, *tl++);
4798                 v41flags = fxdr_unsigned(uint32_t, *tl);
4799                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4800                     NFSHASPNFSOPT(nmp)) {
4801                         NFSCL_DEBUG(1, "set PNFS\n");
4802                         NFSLOCKMNT(nmp);
4803                         nmp->nm_state |= NFSSTA_PNFS;
4804                         NFSUNLOCKMNT(nmp);
4805                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4806                 }
4807                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4808                         dsp->nfsclds_flags |= NFSCLDS_DS;
4809                 if (minorvers == NFSV42_MINORVERSION)
4810                         dsp->nfsclds_flags |= NFSCLDS_MINORV2;
4811                 if (len > 0)
4812                         nd->nd_repstat = nfsrv_mtostr(nd,
4813                             dsp->nfsclds_serverown, len);
4814                 if (nd->nd_repstat == 0) {
4815                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4816                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4817                             NULL, MTX_DEF);
4818                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4819                         *dspp = dsp;
4820                 } else
4821                         free(dsp, M_NFSCLDS);
4822         }
4823         error = nd->nd_repstat;
4824 nfsmout:
4825         m_freem(nd->nd_mrep);
4826         return (error);
4827 }
4828
4829 /*
4830  * Do the NFSv4.1 Create Session.
4831  */
4832 int
4833 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4834     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
4835     struct ucred *cred, NFSPROC_T *p)
4836 {
4837         uint32_t crflags, maxval, *tl;
4838         struct nfsrv_descript nfsd;
4839         struct nfsrv_descript *nd = &nfsd;
4840         int error, irdcnt, minorvers;
4841
4842         /* Make sure nm_rsize, nm_wsize is set. */
4843         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4844                 nmp->nm_rsize = NFS_MAXBSIZE;
4845         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4846                 nmp->nm_wsize = NFS_MAXBSIZE;
4847         if (dsp == NULL)
4848                 minorvers = nmp->nm_minorvers;
4849         else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
4850                 minorvers = NFSV42_MINORVERSION;
4851         else
4852                 minorvers = NFSV41_MINORVERSION;
4853         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
4854             NFS_VER4, minorvers);
4855         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4856         *tl++ = sep->nfsess_clientid.lval[0];
4857         *tl++ = sep->nfsess_clientid.lval[1];
4858         *tl++ = txdr_unsigned(sequenceid);
4859         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4860         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4861                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
4862         *tl = txdr_unsigned(crflags);
4863
4864         /* Fill in fore channel attributes. */
4865         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4866         *tl++ = 0;                              /* Header pad size */
4867         if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
4868             nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
4869                 /*
4870                  * NFSv4.2 Extended Attribute operations may want to do
4871                  * requests/replies that are larger than nm_rsize/nm_wsize.
4872                  */
4873                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4874                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4875         } else {
4876                 *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
4877                 *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
4878         }
4879         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4880         *tl++ = txdr_unsigned(20);              /* Max operations */
4881         *tl++ = txdr_unsigned(64);              /* Max slots */
4882         *tl = 0;                                /* No rdma ird */
4883
4884         /* Fill in back channel attributes. */
4885         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4886         *tl++ = 0;                              /* Header pad size */
4887         *tl++ = txdr_unsigned(10000);           /* Max request size */
4888         *tl++ = txdr_unsigned(10000);           /* Max response size */
4889         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4890         *tl++ = txdr_unsigned(4);               /* Max operations */
4891         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
4892         *tl = 0;                                /* No rdma ird */
4893
4894         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4895         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
4896
4897         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
4898         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
4899         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
4900         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4901         *tl++ = 0;                              /* Null machine name */
4902         *tl++ = 0;                              /* Uid == 0 */
4903         *tl++ = 0;                              /* Gid == 0 */
4904         *tl = 0;                                /* No additional gids */
4905         nd->nd_flag |= ND_USEGSSNAME;
4906         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4907             NFS_VER4, NULL, 1, NULL, NULL);
4908         if (error != 0)
4909                 return (error);
4910         if (nd->nd_repstat == 0) {
4911                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4912                     2 * NFSX_UNSIGNED);
4913                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4914                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4915                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4916                 crflags = fxdr_unsigned(uint32_t, *tl);
4917                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4918                         NFSLOCKMNT(nmp);
4919                         nmp->nm_state |= NFSSTA_SESSPERSIST;
4920                         NFSUNLOCKMNT(nmp);
4921                 }
4922
4923                 /* Get the fore channel slot count. */
4924                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4925                 tl++;                   /* Skip the header pad size. */
4926
4927                 /* Make sure nm_wsize is small enough. */
4928                 maxval = fxdr_unsigned(uint32_t, *tl++);
4929                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4930                         if (nmp->nm_wsize > 8096)
4931                                 nmp->nm_wsize /= 2;
4932                         else
4933                                 break;
4934                 }
4935                 sep->nfsess_maxreq = maxval;
4936
4937                 /* Make sure nm_rsize is small enough. */
4938                 maxval = fxdr_unsigned(uint32_t, *tl++);
4939                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4940                         if (nmp->nm_rsize > 8096)
4941                                 nmp->nm_rsize /= 2;
4942                         else
4943                                 break;
4944                 }
4945                 sep->nfsess_maxresp = maxval;
4946
4947                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4948                 tl++;
4949                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4950                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4951                 irdcnt = fxdr_unsigned(int, *tl);
4952                 if (irdcnt > 0)
4953                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4954
4955                 /* and the back channel slot count. */
4956                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4957                 tl += 5;
4958                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4959                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4960         }
4961         error = nd->nd_repstat;
4962 nfsmout:
4963         m_freem(nd->nd_mrep);
4964         return (error);
4965 }
4966
4967 /*
4968  * Do the NFSv4.1 Destroy Session.
4969  */
4970 int
4971 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4972     struct ucred *cred, NFSPROC_T *p)
4973 {
4974         uint32_t *tl;
4975         struct nfsrv_descript nfsd;
4976         struct nfsrv_descript *nd = &nfsd;
4977         int error;
4978         struct nfsclsession *tsep;
4979
4980         nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4981             0);
4982         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4983         tsep = nfsmnt_mdssession(nmp);
4984         bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4985         nd->nd_flag |= ND_USEGSSNAME;
4986         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4987             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4988         if (error != 0)
4989                 return (error);
4990         error = nd->nd_repstat;
4991         m_freem(nd->nd_mrep);
4992         return (error);
4993 }
4994
4995 /*
4996  * Do the NFSv4.1 Destroy Client.
4997  */
4998 int
4999 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5000     struct ucred *cred, NFSPROC_T *p)
5001 {
5002         uint32_t *tl;
5003         struct nfsrv_descript nfsd;
5004         struct nfsrv_descript *nd = &nfsd;
5005         int error;
5006         struct nfsclsession *tsep;
5007
5008         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5009             0);
5010         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5011         tsep = nfsmnt_mdssession(nmp);
5012         *tl++ = tsep->nfsess_clientid.lval[0];
5013         *tl = tsep->nfsess_clientid.lval[1];
5014         nd->nd_flag |= ND_USEGSSNAME;
5015         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5016             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5017         if (error != 0)
5018                 return (error);
5019         error = nd->nd_repstat;
5020         m_freem(nd->nd_mrep);
5021         return (error);
5022 }
5023
5024 /*
5025  * Do the NFSv4.1 LayoutGet.
5026  */
5027 static int
5028 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5029     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5030     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5031     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
5032     void *stuff)
5033 {
5034         struct nfsrv_descript nfsd, *nd = &nfsd;
5035         int error;
5036
5037         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5038             0);
5039         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5040             layouttype, layoutlen, 0);
5041         nd->nd_flag |= ND_USEGSSNAME;
5042         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5043             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5044         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5045         if (error != 0)
5046                 return (error);
5047         if (nd->nd_repstat == 0)
5048                 error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5049                     flhp);
5050         if (error == 0 && nd->nd_repstat != 0)
5051                 error = nd->nd_repstat;
5052         m_freem(nd->nd_mrep);
5053         return (error);
5054 }
5055
5056 /*
5057  * Do the NFSv4.1 Get Device Info.
5058  */
5059 int
5060 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5061     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5062     NFSPROC_T *p)
5063 {
5064         uint32_t cnt, *tl, vers, minorvers;
5065         struct nfsrv_descript nfsd;
5066         struct nfsrv_descript *nd = &nfsd;
5067         struct sockaddr_in sin, ssin;
5068         struct sockaddr_in6 sin6, ssin6;
5069         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5070         struct nfscldevinfo *ndi;
5071         int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5072         int stripecnt;
5073         uint8_t stripeindex;
5074         sa_family_t af, safilled;
5075
5076         ssin.sin_port = 0;              /* To shut up compiler. */
5077         ssin.sin_addr.s_addr = 0;       /* ditto */
5078         *ndip = NULL;
5079         ndi = NULL;
5080         gotdspp = NULL;
5081         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5082             0);
5083         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5084         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5085         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5086         *tl++ = txdr_unsigned(layouttype);
5087         *tl++ = txdr_unsigned(100000);
5088         if (notifybitsp != NULL && *notifybitsp != 0) {
5089                 *tl = txdr_unsigned(1);         /* One word of bits. */
5090                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5091                 *tl = txdr_unsigned(*notifybitsp);
5092         } else
5093                 *tl = txdr_unsigned(0);
5094         nd->nd_flag |= ND_USEGSSNAME;
5095         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5096             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5097         if (error != 0)
5098                 return (error);
5099         if (nd->nd_repstat == 0) {
5100                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5101                 if (layouttype != fxdr_unsigned(int, *tl))
5102                         printf("EEK! devinfo layout type not same!\n");
5103                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5104                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5105                         stripecnt = fxdr_unsigned(int, *tl);
5106                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5107                         if (stripecnt < 1 || stripecnt > 4096) {
5108                                 printf("pNFS File layout devinfo stripecnt %d:"
5109                                     " out of range\n", stripecnt);
5110                                 error = NFSERR_BADXDR;
5111                                 goto nfsmout;
5112                         }
5113                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5114                             NFSX_UNSIGNED);
5115                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5116                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5117                         if (addrcnt < 1 || addrcnt > 128) {
5118                                 printf("NFS devinfo addrcnt %d: out of range\n",
5119                                     addrcnt);
5120                                 error = NFSERR_BADXDR;
5121                                 goto nfsmout;
5122                         }
5123
5124                         /*
5125                          * Now we know how many stripe indices and addresses, so
5126                          * we can allocate the structure the correct size.
5127                          */
5128                         i = (stripecnt * sizeof(uint8_t)) /
5129                             sizeof(struct nfsclds *) + 1;
5130                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5131                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5132                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5133                             M_ZERO);
5134                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5135                             NFSX_V4DEVICEID);
5136                         ndi->nfsdi_refcnt = 0;
5137                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5138                         ndi->nfsdi_stripecnt = stripecnt;
5139                         ndi->nfsdi_addrcnt = addrcnt;
5140                         /* Fill in the stripe indices. */
5141                         for (i = 0; i < stripecnt; i++) {
5142                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5143                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5144                                 if (stripeindex >= addrcnt) {
5145                                         printf("pNFS File Layout devinfo"
5146                                             " stripeindex %d: too big\n",
5147                                             (int)stripeindex);
5148                                         error = NFSERR_BADXDR;
5149                                         goto nfsmout;
5150                                 }
5151                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5152                         }
5153                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5154                         /* For Flex File, we only get one address list. */
5155                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5156                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5157                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5158                             NFSX_V4DEVICEID);
5159                         ndi->nfsdi_refcnt = 0;
5160                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5161                         addrcnt = ndi->nfsdi_addrcnt = 1;
5162                 }
5163
5164                 /* Now, dissect the server address(es). */
5165                 safilled = AF_UNSPEC;
5166                 for (i = 0; i < addrcnt; i++) {
5167                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5168                         cnt = fxdr_unsigned(uint32_t, *tl);
5169                         if (cnt == 0) {
5170                                 printf("NFS devinfo 0 len addrlist\n");
5171                                 error = NFSERR_BADXDR;
5172                                 goto nfsmout;
5173                         }
5174                         dspp = nfsfldi_addr(ndi, i);
5175                         safilled = AF_UNSPEC;
5176                         for (j = 0; j < cnt; j++) {
5177                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5178                                     &isudp);
5179                                 if (error != 0 && error != EPERM) {
5180                                         error = NFSERR_BADXDR;
5181                                         goto nfsmout;
5182                                 }
5183                                 if (error == 0 && isudp == 0) {
5184                                         /*
5185                                          * The priority is:
5186                                          * - Same address family.
5187                                          * Save the address and dspp, so that
5188                                          * the connection can be done after
5189                                          * parsing is complete.
5190                                          */
5191                                         if (safilled == AF_UNSPEC ||
5192                                             (af == nmp->nm_nam->sa_family &&
5193                                              safilled != nmp->nm_nam->sa_family)
5194                                            ) {
5195                                                 if (af == AF_INET)
5196                                                         ssin = sin;
5197                                                 else
5198                                                         ssin6 = sin6;
5199                                                 safilled = af;
5200                                                 gotdspp = dspp;
5201                                         }
5202                                 }
5203                         }
5204                 }
5205
5206                 gotvers = NFS_VER4;     /* Default NFSv4.1 for File Layout. */
5207                 gotminor = NFSV41_MINORVERSION;
5208                 /* For Flex File, we will take one of the versions to use. */
5209                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5210                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5211                         j = fxdr_unsigned(int, *tl);
5212                         if (j < 1 || j > NFSDEV_MAXVERS) {
5213                                 printf("pNFS: too many versions\n");
5214                                 error = NFSERR_BADXDR;
5215                                 goto nfsmout;
5216                         }
5217                         gotvers = 0;
5218                         gotminor = 0;
5219                         for (i = 0; i < j; i++) {
5220                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5221                                 vers = fxdr_unsigned(uint32_t, *tl++);
5222                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5223                                 if (vers == NFS_VER3)
5224                                         minorvers = 0;
5225                                 if ((vers == NFS_VER4 && ((minorvers ==
5226                                     NFSV41_MINORVERSION && gotminor == 0) ||
5227                                     minorvers == NFSV42_MINORVERSION)) ||
5228                                     (vers == NFS_VER3 && gotvers == 0)) {
5229                                         gotvers = vers;
5230                                         gotminor = minorvers;
5231                                         /* We'll take this one. */
5232                                         ndi->nfsdi_versindex = i;
5233                                         ndi->nfsdi_vers = vers;
5234                                         ndi->nfsdi_minorvers = minorvers;
5235                                         ndi->nfsdi_rsize = fxdr_unsigned(
5236                                             uint32_t, *tl++);
5237                                         ndi->nfsdi_wsize = fxdr_unsigned(
5238                                             uint32_t, *tl++);
5239                                         if (*tl == newnfs_true)
5240                                                 ndi->nfsdi_flags |=
5241                                                     NFSDI_TIGHTCOUPLED;
5242                                         else
5243                                                 ndi->nfsdi_flags &=
5244                                                     ~NFSDI_TIGHTCOUPLED;
5245                                 }
5246                         }
5247                         if (gotvers == 0) {
5248                                 printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5249                                 error = NFSERR_BADXDR;
5250                                 goto nfsmout;
5251                         }
5252                 }
5253
5254                 /* And the notify bits. */
5255                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5256                 bitcnt = fxdr_unsigned(int, *tl);
5257                 if (bitcnt > 0) {
5258                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5259                         if (notifybitsp != NULL)
5260                                 *notifybitsp =
5261                                     fxdr_unsigned(uint32_t, *tl);
5262                 }
5263                 if (safilled != AF_UNSPEC) {
5264                         KASSERT(ndi != NULL, ("ndi is NULL"));
5265                         *ndip = ndi;
5266                 } else
5267                         error = EPERM;
5268                 if (error == 0) {
5269                         /*
5270                          * Now we can do a TCP connection for the correct
5271                          * NFS version and IP address.
5272                          */
5273                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5274                             gotvers, gotminor, &dsp, p);
5275                 }
5276                 if (error == 0) {
5277                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5278                         *gotdspp = dsp;
5279                 }
5280         }
5281         if (nd->nd_repstat != 0 && error == 0)
5282                 error = nd->nd_repstat;
5283 nfsmout:
5284         if (error != 0 && ndi != NULL)
5285                 nfscl_freedevinfo(ndi);
5286         m_freem(nd->nd_mrep);
5287         return (error);
5288 }
5289
5290 /*
5291  * Do the NFSv4.1 LayoutCommit.
5292  */
5293 int
5294 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5295     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5296     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5297 {
5298         uint32_t *tl;
5299         struct nfsrv_descript nfsd, *nd = &nfsd;
5300         int error;
5301
5302         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5303             0, 0);
5304         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5305             NFSX_STATEID);
5306         txdr_hyper(off, tl);
5307         tl += 2;
5308         txdr_hyper(len, tl);
5309         tl += 2;
5310         if (reclaim != 0)
5311                 *tl++ = newnfs_true;
5312         else
5313                 *tl++ = newnfs_false;
5314         *tl++ = txdr_unsigned(stateidp->seqid);
5315         *tl++ = stateidp->other[0];
5316         *tl++ = stateidp->other[1];
5317         *tl++ = stateidp->other[2];
5318         *tl++ = newnfs_true;
5319         if (lastbyte < off)
5320                 lastbyte = off;
5321         else if (lastbyte >= (off + len))
5322                 lastbyte = off + len - 1;
5323         txdr_hyper(lastbyte, tl);
5324         tl += 2;
5325         *tl++ = newnfs_false;
5326         *tl++ = txdr_unsigned(layouttype);
5327         /* All supported layouts are 0 length. */
5328         *tl = txdr_unsigned(0);
5329         nd->nd_flag |= ND_USEGSSNAME;
5330         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5331             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5332         if (error != 0)
5333                 return (error);
5334         error = nd->nd_repstat;
5335         m_freem(nd->nd_mrep);
5336         return (error);
5337 }
5338
5339 /*
5340  * Do the NFSv4.1 LayoutReturn.
5341  */
5342 int
5343 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5344     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5345     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5346     uint32_t stat, uint32_t op, char *devid)
5347 {
5348         uint32_t *tl;
5349         struct nfsrv_descript nfsd, *nd = &nfsd;
5350         uint64_t tu64;
5351         int error;
5352
5353         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5354             0, 0);
5355         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5356         if (reclaim != 0)
5357                 *tl++ = newnfs_true;
5358         else
5359                 *tl++ = newnfs_false;
5360         *tl++ = txdr_unsigned(layouttype);
5361         *tl++ = txdr_unsigned(iomode);
5362         *tl = txdr_unsigned(layoutreturn);
5363         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5364                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5365                     NFSX_UNSIGNED);
5366                 txdr_hyper(offset, tl);
5367                 tl += 2;
5368                 txdr_hyper(len, tl);
5369                 tl += 2;
5370                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5371                 *tl++ = txdr_unsigned(stateidp->seqid);
5372                 *tl++ = stateidp->other[0];
5373                 *tl++ = stateidp->other[1];
5374                 *tl++ = stateidp->other[2];
5375                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5376                         *tl = txdr_unsigned(0);
5377                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5378                         if (stat != 0) {
5379                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5380                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5381                                     NFSX_UNSIGNED);
5382                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5383                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5384                                     NFSX_UNSIGNED);
5385                                 *tl++ = txdr_unsigned(1);       /* One error. */
5386                                 tu64 = 0;                       /* Offset. */
5387                                 txdr_hyper(tu64, tl); tl += 2;
5388                                 tu64 = UINT64_MAX;              /* Length. */
5389                                 txdr_hyper(tu64, tl); tl += 2;
5390                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5391                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5392                                 *tl++ = txdr_unsigned(1);       /* One error. */
5393                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5394                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5395                                 *tl++ = txdr_unsigned(stat);
5396                                 *tl++ = txdr_unsigned(op);
5397                         } else {
5398                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5399                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5400                                 /* No ioerrs. */
5401                                 *tl++ = 0;
5402                         }
5403                         *tl = 0;        /* No stats yet. */
5404                 }
5405         }
5406         nd->nd_flag |= ND_USEGSSNAME;
5407         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5408             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5409         if (error != 0)
5410                 return (error);
5411         if (nd->nd_repstat == 0) {
5412                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5413                 if (*tl != 0) {
5414                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5415                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5416                         stateidp->other[0] = *tl++;
5417                         stateidp->other[1] = *tl++;
5418                         stateidp->other[2] = *tl;
5419                 }
5420         } else
5421                 error = nd->nd_repstat;
5422 nfsmout:
5423         m_freem(nd->nd_mrep);
5424         return (error);
5425 }
5426
5427 /*
5428  * Acquire a layout and devinfo, if possible. The caller must have acquired
5429  * a reference count on the nfsclclient structure before calling this.
5430  * Return the layout in lypp with a reference count on it, if successful.
5431  */
5432 static int
5433 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5434     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5435     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5436 {
5437         struct nfscllayout *lyp;
5438         struct nfsclflayout *flp;
5439         struct nfsclflayouthead flh;
5440         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5441         nfsv4stateid_t stateid;
5442         struct nfsclsession *tsep;
5443
5444         *lypp = NULL;
5445         if (NFSHASFLEXFILE(nmp))
5446                 layouttype = NFSLAYOUT_FLEXFILE;
5447         else
5448                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5449         /*
5450          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5451          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5452          * flp == NULL.
5453          */
5454         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5455             off, &flp, &recalled);
5456         islocked = 0;
5457         if (lyp == NULL || flp == NULL) {
5458                 if (recalled != 0)
5459                         return (EIO);
5460                 LIST_INIT(&flh);
5461                 tsep = nfsmnt_mdssession(nmp);
5462                 layoutlen = tsep->nfsess_maxcache -
5463                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5464                 if (lyp == NULL) {
5465                         stateid.seqid = 0;
5466                         stateid.other[0] = stateidp->other[0];
5467                         stateid.other[1] = stateidp->other[1];
5468                         stateid.other[2] = stateidp->other[2];
5469                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5470                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5471                             (uint64_t)0, layouttype, layoutlen, &stateid,
5472                             &retonclose, &flh, cred, p, NULL);
5473                 } else {
5474                         islocked = 1;
5475                         stateid.seqid = lyp->nfsly_stateid.seqid;
5476                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5477                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5478                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5479                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5480                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5481                             (uint64_t)0, layouttype, layoutlen, &stateid,
5482                             &retonclose, &flh, cred, p, NULL);
5483                 }
5484                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5485                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5486                     &flh, layouttype, error, NULL, cred, p);
5487                 if (error == 0)
5488                         *lypp = lyp;
5489                 else if (islocked != 0)
5490                         nfscl_rellayout(lyp, 1);
5491         } else
5492                 *lypp = lyp;
5493         return (error);
5494 }
5495
5496 /*
5497  * Do a TCP connection plus exchange id and create session.
5498  * If successful, a "struct nfsclds" is linked into the list for the
5499  * mount point and a pointer to it is returned.
5500  */
5501 static int
5502 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5503     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5504     struct nfsclds **dspp, NFSPROC_T *p)
5505 {
5506         struct sockaddr_in *msad, *sad;
5507         struct sockaddr_in6 *msad6, *sad6;
5508         struct nfsclclient *clp;
5509         struct nfssockreq *nrp;
5510         struct nfsclds *dsp, *tdsp;
5511         int error, firsttry;
5512         enum nfsclds_state retv;
5513         uint32_t sequenceid = 0;
5514
5515         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5516             ("nfsrpc_fillsa: NULL nr_cred"));
5517         NFSLOCKCLSTATE();
5518         clp = nmp->nm_clp;
5519         NFSUNLOCKCLSTATE();
5520         if (clp == NULL)
5521                 return (EPERM);
5522         if (af == AF_INET) {
5523                 NFSLOCKMNT(nmp);
5524                 /*
5525                  * Check to see if we already have a session for this
5526                  * address that is usable for a DS.
5527                  * Note that the MDS's address is in a different place
5528                  * than the sessions already acquired for DS's.
5529                  */
5530                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5531                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5532                 while (tdsp != NULL) {
5533                         if (msad != NULL && msad->sin_family == AF_INET &&
5534                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5535                             sin->sin_port == msad->sin_port &&
5536                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5537                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5538                                 *dspp = tdsp;
5539                                 NFSUNLOCKMNT(nmp);
5540                                 NFSCL_DEBUG(4, "fnd same addr\n");
5541                                 return (0);
5542                         }
5543                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5544                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5545                                 msad = (struct sockaddr_in *)
5546                                     tdsp->nfsclds_sockp->nr_nam;
5547                         else
5548                                 msad = NULL;
5549                 }
5550                 NFSUNLOCKMNT(nmp);
5551
5552                 /* No IP address match, so look for new/trunked one. */
5553                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5554                 sad->sin_len = sizeof(*sad);
5555                 sad->sin_family = AF_INET;
5556                 sad->sin_port = sin->sin_port;
5557                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5558                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5559                 nrp->nr_nam = (struct sockaddr *)sad;
5560         } else if (af == AF_INET6) {
5561                 NFSLOCKMNT(nmp);
5562                 /*
5563                  * Check to see if we already have a session for this
5564                  * address that is usable for a DS.
5565                  * Note that the MDS's address is in a different place
5566                  * than the sessions already acquired for DS's.
5567                  */
5568                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5569                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5570                 while (tdsp != NULL) {
5571                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5572                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5573                             &msad6->sin6_addr) &&
5574                             sin6->sin6_port == msad6->sin6_port &&
5575                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5576                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5577                                 *dspp = tdsp;
5578                                 NFSUNLOCKMNT(nmp);
5579                                 return (0);
5580                         }
5581                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5582                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5583                                 msad6 = (struct sockaddr_in6 *)
5584                                     tdsp->nfsclds_sockp->nr_nam;
5585                         else
5586                                 msad6 = NULL;
5587                 }
5588                 NFSUNLOCKMNT(nmp);
5589
5590                 /* No IP address match, so look for new/trunked one. */
5591                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5592                 sad6->sin6_len = sizeof(*sad6);
5593                 sad6->sin6_family = AF_INET6;
5594                 sad6->sin6_port = sin6->sin6_port;
5595                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5596                     sizeof(struct in6_addr));
5597                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5598                 nrp->nr_nam = (struct sockaddr *)sad6;
5599         } else
5600                 return (EPERM);
5601
5602         nrp->nr_sotype = SOCK_STREAM;
5603         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5604         nrp->nr_prog = NFS_PROG;
5605         nrp->nr_vers = vers;
5606
5607         /*
5608          * Use the credentials that were used for the mount, which are
5609          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5610          * Ref. counting the credentials with crhold() is probably not
5611          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5612          * unmount, but I did it anyhow.
5613          */
5614         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5615         error = newnfs_connect(nmp, nrp, NULL, p, 0, false);
5616         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5617
5618         dsp = NULL;
5619         /* Now, do the exchangeid and create session. */
5620         if (error == 0) {
5621                 if (vers == NFS_VER4) {
5622                         firsttry = 0;
5623                         do {
5624                                 error = nfsrpc_exchangeid(nmp, clp, nrp, 
5625                                     minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5626                                     nrp->nr_cred, p);
5627                                 NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5628                                 if (error == NFSERR_MINORVERMISMATCH)
5629                                         minorvers = NFSV42_MINORVERSION;
5630                         } while (error == NFSERR_MINORVERMISMATCH &&
5631                             firsttry++ == 0);
5632                         if (error != 0)
5633                                 newnfs_disconnect(nrp);
5634                 } else {
5635                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5636                             M_WAITOK | M_ZERO);
5637                         dsp->nfsclds_flags |= NFSCLDS_DS;
5638                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5639                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5640                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5641                             NULL, MTX_DEF);
5642                 }
5643         }
5644         if (error == 0) {
5645                 dsp->nfsclds_sockp = nrp;
5646                 if (vers == NFS_VER4) {
5647                         NFSLOCKMNT(nmp);
5648                         retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5649                             &sequenceid);
5650                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5651                         if (retv == NFSDSP_USETHISSESSION &&
5652                             nfscl_dssameconn != 0) {
5653                                 NFSLOCKDS(tdsp);
5654                                 tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5655                                 NFSUNLOCKDS(tdsp);
5656                                 NFSUNLOCKMNT(nmp);
5657                                 /*
5658                                  * If there is already a session for this
5659                                  * server, use it.
5660                                  */
5661                                 (void)newnfs_disconnect(nrp);
5662                                 nfscl_freenfsclds(dsp);
5663                                 *dspp = tdsp;
5664                                 return (0);
5665                         }
5666                         if (retv == NFSDSP_NOTFOUND)
5667                                 sequenceid =
5668                                     dsp->nfsclds_sess.nfsess_sequenceid;
5669                         NFSUNLOCKMNT(nmp);
5670                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5671                             nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
5672                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5673                 }
5674         } else {
5675                 NFSFREECRED(nrp->nr_cred);
5676                 NFSFREEMUTEX(&nrp->nr_mtx);
5677                 free(nrp->nr_nam, M_SONAME);
5678                 free(nrp, M_NFSSOCKREQ);
5679         }
5680         if (error == 0) {
5681                 NFSCL_DEBUG(3, "add DS session\n");
5682                 /*
5683                  * Put it at the end of the list. That way the list
5684                  * is ordered by when the entry was added. This matters
5685                  * since the one done first is the one that should be
5686                  * used for sequencid'ing any subsequent create sessions.
5687                  */
5688                 NFSLOCKMNT(nmp);
5689                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5690                 NFSUNLOCKMNT(nmp);
5691                 *dspp = dsp;
5692         } else if (dsp != NULL) {
5693                 newnfs_disconnect(nrp);
5694                 nfscl_freenfsclds(dsp);
5695         }
5696         return (error);
5697 }
5698
5699 /*
5700  * Do the NFSv4.1 Reclaim Complete.
5701  */
5702 int
5703 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5704 {
5705         uint32_t *tl;
5706         struct nfsrv_descript nfsd;
5707         struct nfsrv_descript *nd = &nfsd;
5708         int error;
5709
5710         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5711             0);
5712         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5713         *tl = newnfs_false;
5714         nd->nd_flag |= ND_USEGSSNAME;
5715         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5716             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5717         if (error != 0)
5718                 return (error);
5719         error = nd->nd_repstat;
5720         m_freem(nd->nd_mrep);
5721         return (error);
5722 }
5723
5724 /*
5725  * Initialize the slot tables for a session.
5726  */
5727 static void
5728 nfscl_initsessionslots(struct nfsclsession *sep)
5729 {
5730         int i;
5731
5732         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5733                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5734                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5735                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5736         }
5737         for (i = 0; i < 64; i++)
5738                 sep->nfsess_slotseq[i] = 0;
5739         sep->nfsess_slots = 0;
5740 }
5741
5742 /*
5743  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5744  */
5745 int
5746 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5747     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5748 {
5749         struct nfsnode *np = VTONFS(vp);
5750         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5751         struct nfscllayout *layp;
5752         struct nfscldevinfo *dip;
5753         struct nfsclflayout *rflp;
5754         struct mbuf *m, *m2;
5755         struct nfsclwritedsdorpc *drpc, *tdrpc;
5756         nfsv4stateid_t stateid;
5757         struct ucred *newcred;
5758         uint64_t lastbyte, len, off, oresid, xfer;
5759         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5760         void *lckp;
5761         uint8_t *dev;
5762         void *iovbase = NULL;
5763         size_t iovlen = 0;
5764         off_t offs = 0;
5765         ssize_t resid = 0;
5766
5767         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5768             (np->n_flag & NNOLAYOUT) != 0)
5769                 return (EIO);
5770         /* Now, get a reference cnt on the clientid for this mount. */
5771         if (nfscl_getref(nmp) == 0)
5772                 return (EIO);
5773
5774         /* Find an appropriate stateid. */
5775         newcred = NFSNEWCRED(cred);
5776         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5777             rwaccess, 1, newcred, p, &stateid, &lckp);
5778         if (error != 0) {
5779                 NFSFREECRED(newcred);
5780                 nfscl_relref(nmp);
5781                 return (error);
5782         }
5783         /* Search for a layout for this file. */
5784         off = uiop->uio_offset;
5785         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5786             np->n_fhp->nfh_len, off, &rflp, &recalled);
5787         if (layp == NULL || rflp == NULL) {
5788                 if (recalled != 0) {
5789                         NFSFREECRED(newcred);
5790                         nfscl_relref(nmp);
5791                         return (EIO);
5792                 }
5793                 if (layp != NULL) {
5794                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5795                         layp = NULL;
5796                 }
5797                 /* Try and get a Layout, if it is supported. */
5798                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5799                     (np->n_flag & NWRITEOPENED) != 0)
5800                         iolaymode = NFSLAYOUTIOMODE_RW;
5801                 else
5802                         iolaymode = NFSLAYOUTIOMODE_READ;
5803                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5804                     NULL, &stateid, off, &layp, newcred, p);
5805                 if (error != 0) {
5806                         NFSLOCKNODE(np);
5807                         np->n_flag |= NNOLAYOUT;
5808                         NFSUNLOCKNODE(np);
5809                         if (lckp != NULL)
5810                                 nfscl_lockderef(lckp);
5811                         NFSFREECRED(newcred);
5812                         if (layp != NULL)
5813                                 nfscl_rellayout(layp, 0);
5814                         nfscl_relref(nmp);
5815                         return (error);
5816                 }
5817         }
5818
5819         /*
5820          * Loop around finding a layout that works for the first part of
5821          * this I/O operation, and then call the function that actually
5822          * does the RPC.
5823          */
5824         eof = 0;
5825         len = (uint64_t)uiop->uio_resid;
5826         while (len > 0 && error == 0 && eof == 0) {
5827                 off = uiop->uio_offset;
5828                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5829                 if (error == 0) {
5830                         oresid = xfer = (uint64_t)uiop->uio_resid;
5831                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5832                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5833                         /*
5834                          * For Flex File layout with mirrored DSs, select one
5835                          * of them at random for reads. For writes and commits,
5836                          * do all mirrors.
5837                          */
5838                         m = NULL;
5839                         tdrpc = drpc = NULL;
5840                         firstmirror = 0;
5841                         mirrorcnt = 1;
5842                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5843                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5844                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5845                                         firstmirror = arc4random() % mirrorcnt;
5846                                         mirrorcnt = firstmirror + 1;
5847                                 } else {
5848                                         if (docommit == 0) {
5849                                                 /*
5850                                                  * Save values, so uiop can be
5851                                                  * rolled back upon a write
5852                                                  * error.
5853                                                  */
5854                                                 offs = uiop->uio_offset;
5855                                                 resid = uiop->uio_resid;
5856                                                 iovbase =
5857                                                     uiop->uio_iov->iov_base;
5858                                                 iovlen = uiop->uio_iov->iov_len;
5859                                                 m = nfsm_uiombuflist(uiop, len,
5860                                                     0);
5861                                         }
5862                                         tdrpc = drpc = malloc(sizeof(*drpc) *
5863                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
5864                                             M_ZERO);
5865                                 }
5866                         }
5867                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5868                                 m2 = NULL;
5869                                 if (m != NULL && i < mirrorcnt - 1)
5870                                         m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
5871                                 else {
5872                                         m2 = m;
5873                                         m = NULL;
5874                                 }
5875                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
5876                                         dev = rflp->nfsfl_ffm[i].dev;
5877                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5878                                             rflp->nfsfl_ffm[i].devp);
5879                                 } else {
5880                                         dev = rflp->nfsfl_dev;
5881                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5882                                             rflp->nfsfl_devp);
5883                                 }
5884                                 if (dip != NULL) {
5885                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5886                                             != 0)
5887                                                 error = nfscl_dofflayoutio(vp,
5888                                                     uiop, iomode, must_commit,
5889                                                     &eof, &stateid, rwaccess,
5890                                                     dip, layp, rflp, off, xfer,
5891                                                     i, docommit, m2, tdrpc,
5892                                                     newcred, p);
5893                                         else
5894                                                 error = nfscl_doflayoutio(vp,
5895                                                     uiop, iomode, must_commit,
5896                                                     &eof, &stateid, rwaccess,
5897                                                     dip, layp, rflp, off, xfer,
5898                                                     docommit, newcred, p);
5899                                         nfscl_reldevinfo(dip);
5900                                 } else {
5901                                         if (m2 != NULL)
5902                                                 m_freem(m2);
5903                                         error = EIO;
5904                                 }
5905                                 tdrpc++;
5906                         }
5907                         if (m != NULL)
5908                                 m_freem(m);
5909                         tdrpc = drpc;
5910                         timo = hz / 50;         /* Wait for 20msec. */
5911                         if (timo < 1)
5912                                 timo = 1;
5913                         for (i = firstmirror; i < mirrorcnt - 1 &&
5914                             tdrpc != NULL; i++, tdrpc++) {
5915                                 /*
5916                                  * For the unused drpc entries, both inprog and
5917                                  * err == 0, so this loop won't break.
5918                                  */
5919                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5920                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
5921                                             timo);
5922                                 if (error == 0 && tdrpc->err != 0)
5923                                         error = tdrpc->err;
5924                         }
5925                         free(drpc, M_TEMP);
5926                         if (error == 0) {
5927                                 if (mirrorcnt > 1 && rwaccess ==
5928                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5929                                         NFSLOCKCLSTATE();
5930                                         layp->nfsly_flags |= NFSLY_WRITTEN;
5931                                         NFSUNLOCKCLSTATE();
5932                                 }
5933                                 lastbyte = off + xfer - 1;
5934                                 NFSLOCKCLSTATE();
5935                                 if (lastbyte > layp->nfsly_lastbyte)
5936                                         layp->nfsly_lastbyte = lastbyte;
5937                                 NFSUNLOCKCLSTATE();
5938                         } else if (error == NFSERR_OPENMODE &&
5939                             rwaccess == NFSV4OPEN_ACCESSREAD) {
5940                                 NFSLOCKMNT(nmp);
5941                                 nmp->nm_state |= NFSSTA_OPENMODE;
5942                                 NFSUNLOCKMNT(nmp);
5943                         } else
5944                                 error = EIO;
5945                         if (error == 0)
5946                                 len -= (oresid - (uint64_t)uiop->uio_resid);
5947                         else if (mirrorcnt > 1 && rwaccess ==
5948                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5949                                 /*
5950                                  * In case the rpc gets retried, roll the
5951                                  * uio fields changed by nfsm_uiombuflist()
5952                                  * back.
5953                                  */
5954                                 uiop->uio_offset = offs;
5955                                 uiop->uio_resid = resid;
5956                                 uiop->uio_iov->iov_base = iovbase;
5957                                 uiop->uio_iov->iov_len = iovlen;
5958                         }
5959                 }
5960         }
5961         if (lckp != NULL)
5962                 nfscl_lockderef(lckp);
5963         NFSFREECRED(newcred);
5964         nfscl_rellayout(layp, 0);
5965         nfscl_relref(nmp);
5966         return (error);
5967 }
5968
5969 /*
5970  * Find a file layout that will handle the first bytes of the requested
5971  * range and return the information from it needed to the I/O operation.
5972  */
5973 int
5974 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5975     struct nfsclflayout **retflpp)
5976 {
5977         struct nfsclflayout *flp, *nflp, *rflp;
5978         uint32_t rw;
5979
5980         rflp = NULL;
5981         rw = rwaccess;
5982         /* For reading, do the Read list first and then the Write list. */
5983         do {
5984                 if (rw == NFSV4OPEN_ACCESSREAD)
5985                         flp = LIST_FIRST(&lyp->nfsly_flayread);
5986                 else
5987                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
5988                 while (flp != NULL) {
5989                         nflp = LIST_NEXT(flp, nfsfl_list);
5990                         if (flp->nfsfl_off > off)
5991                                 break;
5992                         if (flp->nfsfl_end > off &&
5993                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5994                                 rflp = flp;
5995                         flp = nflp;
5996                 }
5997                 if (rw == NFSV4OPEN_ACCESSREAD)
5998                         rw = NFSV4OPEN_ACCESSWRITE;
5999                 else
6000                         rw = 0;
6001         } while (rw != 0);
6002         if (rflp != NULL) {
6003                 /* This one covers the most bytes starting at off. */
6004                 *retflpp = rflp;
6005                 return (0);
6006         }
6007         return (EIO);
6008 }
6009
6010 /*
6011  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6012  */
6013 static int
6014 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6015     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6016     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6017     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6018 {
6019         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6020         int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6021         struct nfsnode *np;
6022         struct nfsfh *fhp;
6023         struct nfsclds **dspp;
6024
6025         np = VTONFS(vp);
6026         rel_off = off - flp->nfsfl_patoff;
6027         stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
6028         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6029             dp->nfsdi_stripecnt;
6030         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6031         error = 0;
6032
6033         /* Loop around, doing I/O for each stripe unit. */
6034         while (len > 0 && error == 0) {
6035                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6036                 dspp = nfsfldi_addr(dp, stripe_index);
6037                 if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6038                         minorvers = NFSV42_MINORVERSION;
6039                 else
6040                         minorvers = NFSV41_MINORVERSION;
6041                 if (len > transfer && docommit == 0)
6042                         xfer = transfer;
6043                 else
6044                         xfer = len;
6045                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6046                         /* Dense layout. */
6047                         if (stripe_pos >= flp->nfsfl_fhcnt)
6048                                 return (EIO);
6049                         fhp = flp->nfsfl_fh[stripe_pos];
6050                         io_off = (rel_off / (stripe_unit_size *
6051                             dp->nfsdi_stripecnt)) * stripe_unit_size +
6052                             rel_off % stripe_unit_size;
6053                 } else {
6054                         /* Sparse layout. */
6055                         if (flp->nfsfl_fhcnt > 1) {
6056                                 if (stripe_index >= flp->nfsfl_fhcnt)
6057                                         return (EIO);
6058                                 fhp = flp->nfsfl_fh[stripe_index];
6059                         } else if (flp->nfsfl_fhcnt == 1)
6060                                 fhp = flp->nfsfl_fh[0];
6061                         else
6062                                 fhp = np->n_fhp;
6063                         io_off = off;
6064                 }
6065                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6066                         commit_thru_mds = 1;
6067                         if (docommit != 0)
6068                                 error = EIO;
6069                 } else {
6070                         commit_thru_mds = 0;
6071                         NFSLOCKNODE(np);
6072                         np->n_flag |= NDSCOMMIT;
6073                         NFSUNLOCKNODE(np);
6074                 }
6075                 if (docommit != 0) {
6076                         if (error == 0)
6077                                 error = nfsrpc_commitds(vp, io_off, xfer,
6078                                     *dspp, fhp, NFS_VER4, minorvers, cred, p);
6079                         if (error == 0) {
6080                                 /*
6081                                  * Set both eof and uio_resid = 0 to end any
6082                                  * loops.
6083                                  */
6084                                 *eofp = 1;
6085                                 uiop->uio_resid = 0;
6086                         } else {
6087                                 NFSLOCKNODE(np);
6088                                 np->n_flag &= ~NDSCOMMIT;
6089                                 NFSUNLOCKNODE(np);
6090                         }
6091                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6092                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6093                             io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6094                 else {
6095                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6096                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6097                             0, NFS_VER4, minorvers, cred, p);
6098                         if (error == 0) {
6099                                 NFSLOCKCLSTATE();
6100                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
6101                                 NFSUNLOCKCLSTATE();
6102                         }
6103                 }
6104                 if (error == 0) {
6105                         transfer = stripe_unit_size;
6106                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6107                         len -= xfer;
6108                         off += xfer;
6109                 }
6110         }
6111         return (error);
6112 }
6113
6114 /*
6115  * Do I/O using an NFSv4.1 flex file layout.
6116  */
6117 static int
6118 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6119     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6120     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6121     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6122     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6123 {
6124         uint64_t xfer;
6125         int error;
6126         struct nfsnode *np;
6127         struct nfsfh *fhp;
6128         struct nfsclds **dspp;
6129         struct ucred *tcred;
6130         struct mbuf *m, *m2;
6131         uint32_t copylen;
6132
6133         np = VTONFS(vp);
6134         error = 0;
6135         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6136             (uintmax_t)len);
6137         /* Loop around, doing I/O for each stripe unit. */
6138         while (len > 0 && error == 0) {
6139                 dspp = nfsfldi_addr(dp, 0);
6140                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6141                 stateidp = &flp->nfsfl_ffm[mirror].st;
6142                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6143                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6144                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6145                         tcred = NFSNEWCRED(cred);
6146                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6147                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6148                         tcred->cr_ngroups = 1;
6149                 } else
6150                         tcred = cred;
6151                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6152                         copylen = dp->nfsdi_rsize;
6153                 else {
6154                         copylen = dp->nfsdi_wsize;
6155                         if (len > copylen && mp != NULL) {
6156                                 /*
6157                                  * When a mirrored configuration needs to do
6158                                  * multiple writes to each mirror, all writes
6159                                  * except the last one must be a multiple of
6160                                  * 4 bytes.  This is required so that the XDR
6161                                  * does not need padding.
6162                                  * If possible, clip the size to an exact
6163                                  * multiple of the mbuf length, so that the
6164                                  * split will be on an mbuf boundary.
6165                                  */
6166                                 copylen &= 0xfffffffc;
6167                                 if (copylen > mp->m_len)
6168                                         copylen = copylen / mp->m_len *
6169                                             mp->m_len;
6170                         }
6171                 }
6172                 NFSLOCKNODE(np);
6173                 np->n_flag |= NDSCOMMIT;
6174                 NFSUNLOCKNODE(np);
6175                 if (len > copylen && docommit == 0)
6176                         xfer = copylen;
6177                 else
6178                         xfer = len;
6179                 if (docommit != 0) {
6180                         if (error == 0) {
6181                                 /*
6182                                  * Do last mirrored DS commit with this thread.
6183                                  */
6184                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6185                                         error = nfsio_commitds(vp, off, xfer,
6186                                             *dspp, fhp, dp->nfsdi_vers,
6187                                             dp->nfsdi_minorvers, drpc, tcred,
6188                                             p);
6189                                 else
6190                                         error = nfsrpc_commitds(vp, off, xfer,
6191                                             *dspp, fhp, dp->nfsdi_vers,
6192                                             dp->nfsdi_minorvers, tcred, p);
6193                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6194                                 if (error != 0 && error != EACCES && error !=
6195                                     ESTALE) {
6196                                         NFSCL_DEBUG(4,
6197                                             "DS layreterr for commit\n");
6198                                         nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6199                                             lyp, *dspp);
6200                                 }
6201                         }
6202                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6203                         if (error == 0) {
6204                                 /*
6205                                  * Set both eof and uio_resid = 0 to end any
6206                                  * loops.
6207                                  */
6208                                 *eofp = 1;
6209                                 uiop->uio_resid = 0;
6210                         } else {
6211                                 NFSLOCKNODE(np);
6212                                 np->n_flag &= ~NDSCOMMIT;
6213                                 NFSUNLOCKNODE(np);
6214                         }
6215                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6216                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6217                             off, xfer, fhp, 1, dp->nfsdi_vers,
6218                             dp->nfsdi_minorvers, tcred, p);
6219                         NFSCL_DEBUG(4, "readds=%d\n", error);
6220                         if (error != 0 && error != EACCES && error != ESTALE) {
6221                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6222                                 nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6223                                     *dspp);
6224                         }
6225                 } else {
6226                         if (flp->nfsfl_mirrorcnt == 1) {
6227                                 error = nfsrpc_writeds(vp, uiop, iomode,
6228                                     must_commit, stateidp, *dspp, off, xfer,
6229                                     fhp, 0, 1, dp->nfsdi_vers,
6230                                     dp->nfsdi_minorvers, tcred, p);
6231                                 if (error == 0) {
6232                                         NFSLOCKCLSTATE();
6233                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6234                                         NFSUNLOCKCLSTATE();
6235                                 }
6236                         } else {
6237                                 m = mp;
6238                                 if (xfer < len) {
6239                                         /* The mbuf list must be split. */
6240                                         m2 = nfsm_split(mp, xfer);
6241                                         if (m2 != NULL)
6242                                                 mp = m2;
6243                                         else {
6244                                                 m_freem(mp);
6245                                                 error = EIO;
6246                                         }
6247                                 }
6248                                 NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6249                                     (uintmax_t)len, (uintmax_t)xfer);
6250                                 /*
6251                                  * Do last write to a mirrored DS with this
6252                                  * thread.
6253                                  */
6254                                 if (error == 0) {
6255                                         if (mirror < flp->nfsfl_mirrorcnt - 1)
6256                                                 error = nfsio_writedsmir(vp,
6257                                                     iomode, must_commit,
6258                                                     stateidp, *dspp, off,
6259                                                     xfer, fhp, m,
6260                                                     dp->nfsdi_vers,
6261                                                     dp->nfsdi_minorvers, drpc,
6262                                                     tcred, p);
6263                                         else
6264                                                 error = nfsrpc_writedsmir(vp,
6265                                                     iomode, must_commit,
6266                                                     stateidp, *dspp, off,
6267                                                     xfer, fhp, m,
6268                                                     dp->nfsdi_vers,
6269                                                     dp->nfsdi_minorvers, tcred,
6270                                                     p);
6271                                 }
6272                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6273                                 if (error != 0 && error != EACCES && error !=
6274                                     ESTALE) {
6275                                         NFSCL_DEBUG(4,
6276                                             "DS layreterr for write\n");
6277                                         nfscl_dserr(NFSV4OP_WRITE, error, dp,
6278                                             lyp, *dspp);
6279                                 }
6280                         }
6281                 }
6282                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6283                 if (error == 0) {
6284                         len -= xfer;
6285                         off += xfer;
6286                 }
6287                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6288                         NFSFREECRED(tcred);
6289         }
6290         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6291         return (error);
6292 }
6293
6294 /*
6295  * The actual read RPC done to a DS.
6296  */
6297 static int
6298 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6299     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6300     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6301 {
6302         uint32_t *tl;
6303         int attrflag, error, retlen;
6304         struct nfsrv_descript nfsd;
6305         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6306         struct nfsrv_descript *nd = &nfsd;
6307         struct nfssockreq *nrp;
6308         struct nfsvattr na;
6309
6310         nd->nd_mrep = NULL;
6311         if (vers == 0 || vers == NFS_VER4) {
6312                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6313                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6314                 vers = NFS_VER4;
6315                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6316                 if (flex != 0)
6317                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6318                 else
6319                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6320         } else {
6321                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6322                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6323                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6324         }
6325         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6326         txdr_hyper(io_off, tl);
6327         *(tl + 2) = txdr_unsigned(len);
6328         nrp = dsp->nfsclds_sockp;
6329         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6330         if (nrp == NULL)
6331                 /* If NULL, use the MDS socket. */
6332                 nrp = &nmp->nm_sockreq;
6333         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6334             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6335         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6336             error);
6337         if (error != 0)
6338                 return (error);
6339         if (vers == NFS_VER3) {
6340                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6341                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6342                 if (error != 0)
6343                         goto nfsmout;
6344         }
6345         if (nd->nd_repstat != 0) {
6346                 error = nd->nd_repstat;
6347                 goto nfsmout;
6348         }
6349         if (vers == NFS_VER3) {
6350                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6351                 *eofp = fxdr_unsigned(int, *(tl + 1));
6352         } else {
6353                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6354                 *eofp = fxdr_unsigned(int, *tl);
6355         }
6356         NFSM_STRSIZ(retlen, len);
6357         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6358         error = nfsm_mbufuio(nd, uiop, retlen);
6359 nfsmout:
6360         if (nd->nd_mrep != NULL)
6361                 m_freem(nd->nd_mrep);
6362         return (error);
6363 }
6364
6365 /*
6366  * The actual write RPC done to a DS.
6367  */
6368 static int
6369 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6370     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6371     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6372     struct ucred *cred, NFSPROC_T *p)
6373 {
6374         uint32_t *tl;
6375         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6376         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6377         int32_t backup;
6378         struct nfsrv_descript nfsd;
6379         struct nfsrv_descript *nd = &nfsd;
6380         struct nfssockreq *nrp;
6381         struct nfsvattr na;
6382
6383         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6384         nd->nd_mrep = NULL;
6385         if (vers == 0 || vers == NFS_VER4) {
6386                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6387                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6388                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6389                 vers = NFS_VER4;
6390                 if (flex != 0)
6391                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6392                 else
6393                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6394                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6395         } else {
6396                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6397                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6398                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6399                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6400         }
6401         txdr_hyper(io_off, tl);
6402         tl += 2;
6403         if (vers == NFS_VER3)
6404                 *tl++ = txdr_unsigned(len);
6405         *tl++ = txdr_unsigned(*iomode);
6406         *tl = txdr_unsigned(len);
6407         nfsm_uiombuf(nd, uiop, len);
6408         nrp = dsp->nfsclds_sockp;
6409         if (nrp == NULL)
6410                 /* If NULL, use the MDS socket. */
6411                 nrp = &nmp->nm_sockreq;
6412         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6413             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6414         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6415             nd->nd_repstat);
6416         if (error != 0)
6417                 return (error);
6418         if (nd->nd_repstat != 0) {
6419                 /*
6420                  * In case the rpc gets retried, roll
6421                  * the uio fileds changed by nfsm_uiombuf()
6422                  * back.
6423                  */
6424                 uiop->uio_offset -= len;
6425                 uiop->uio_resid += len;
6426                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6427                 uiop->uio_iov->iov_len += len;
6428                 error = nd->nd_repstat;
6429         } else {
6430                 if (vers == NFS_VER3) {
6431                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6432                             NULL);
6433                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6434                         if (error != 0)
6435                                 goto nfsmout;
6436                 }
6437                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6438                 rlen = fxdr_unsigned(int, *tl++);
6439                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6440                 if (rlen == 0) {
6441                         error = NFSERR_IO;
6442                         goto nfsmout;
6443                 } else if (rlen < len) {
6444                         backup = len - rlen;
6445                         uiop->uio_iov->iov_base =
6446                             (char *)uiop->uio_iov->iov_base - backup;
6447                         uiop->uio_iov->iov_len += backup;
6448                         uiop->uio_offset -= backup;
6449                         uiop->uio_resid += backup;
6450                         len = rlen;
6451                 }
6452                 commit = fxdr_unsigned(int, *tl++);
6453
6454                 /*
6455                  * Return the lowest commitment level
6456                  * obtained by any of the RPCs.
6457                  */
6458                 if (committed == NFSWRITE_FILESYNC)
6459                         committed = commit;
6460                 else if (committed == NFSWRITE_DATASYNC &&
6461                     commit == NFSWRITE_UNSTABLE)
6462                         committed = commit;
6463                 if (commit_thru_mds != 0) {
6464                         NFSLOCKMNT(nmp);
6465                         if (!NFSHASWRITEVERF(nmp)) {
6466                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6467                                 NFSSETWRITEVERF(nmp);
6468                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6469                                 *must_commit = 1;
6470                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6471                         }
6472                         NFSUNLOCKMNT(nmp);
6473                 } else {
6474                         NFSLOCKDS(dsp);
6475                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6476                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6477                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6478                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6479                                 *must_commit = 1;
6480                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6481                         }
6482                         NFSUNLOCKDS(dsp);
6483                 }
6484         }
6485 nfsmout:
6486         if (nd->nd_mrep != NULL)
6487                 m_freem(nd->nd_mrep);
6488         *iomode = committed;
6489         if (nd->nd_repstat != 0 && error == 0)
6490                 error = nd->nd_repstat;
6491         return (error);
6492 }
6493
6494 /*
6495  * The actual write RPC done to a DS.
6496  * This variant is called from a separate kernel process for mirrors.
6497  * Any short write is considered an IO error.
6498  */
6499 static int
6500 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6501     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6502     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6503     struct ucred *cred, NFSPROC_T *p)
6504 {
6505         uint32_t *tl;
6506         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6507         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6508         struct nfsrv_descript nfsd;
6509         struct nfsrv_descript *nd = &nfsd;
6510         struct nfssockreq *nrp;
6511         struct nfsvattr na;
6512
6513         nd->nd_mrep = NULL;
6514         if (vers == 0 || vers == NFS_VER4) {
6515                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6516                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6517                 vers = NFS_VER4;
6518                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6519                     minorvers);
6520                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6521                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6522         } else {
6523                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6524                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6525                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6526                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6527         }
6528         txdr_hyper(io_off, tl);
6529         tl += 2;
6530         if (vers == NFS_VER3)
6531                 *tl++ = txdr_unsigned(len);
6532         *tl++ = txdr_unsigned(*iomode);
6533         *tl = txdr_unsigned(len);
6534         if (len > 0) {
6535                 /* Put data in mbuf chain. */
6536                 nd->nd_mb->m_next = m;
6537         }
6538         nrp = dsp->nfsclds_sockp;
6539         if (nrp == NULL)
6540                 /* If NULL, use the MDS socket. */
6541                 nrp = &nmp->nm_sockreq;
6542         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6543             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6544         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6545             nd->nd_repstat);
6546         if (error != 0)
6547                 return (error);
6548         if (nd->nd_repstat != 0)
6549                 error = nd->nd_repstat;
6550         else {
6551                 if (vers == NFS_VER3) {
6552                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6553                             NULL);
6554                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6555                             error);
6556                         if (error != 0)
6557                                 goto nfsmout;
6558                 }
6559                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6560                 rlen = fxdr_unsigned(int, *tl++);
6561                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6562                     rlen);
6563                 if (rlen != len) {
6564                         error = NFSERR_IO;
6565                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6566                             len, rlen);
6567                         goto nfsmout;
6568                 }
6569                 commit = fxdr_unsigned(int, *tl++);
6570
6571                 /*
6572                  * Return the lowest commitment level
6573                  * obtained by any of the RPCs.
6574                  */
6575                 if (committed == NFSWRITE_FILESYNC)
6576                         committed = commit;
6577                 else if (committed == NFSWRITE_DATASYNC &&
6578                     commit == NFSWRITE_UNSTABLE)
6579                         committed = commit;
6580                 NFSLOCKDS(dsp);
6581                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6582                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6583                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6584                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6585                         *must_commit = 1;
6586                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6587                 }
6588                 NFSUNLOCKDS(dsp);
6589         }
6590 nfsmout:
6591         if (nd->nd_mrep != NULL)
6592                 m_freem(nd->nd_mrep);
6593         *iomode = committed;
6594         if (nd->nd_repstat != 0 && error == 0)
6595                 error = nd->nd_repstat;
6596         return (error);
6597 }
6598
6599 /*
6600  * Start up the thread that will execute nfsrpc_writedsmir().
6601  */
6602 static void
6603 start_writedsmir(void *arg, int pending)
6604 {
6605         struct nfsclwritedsdorpc *drpc;
6606
6607         drpc = (struct nfsclwritedsdorpc *)arg;
6608         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6609             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6610             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6611             drpc->p);
6612         drpc->done = 1;
6613         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6614 }
6615
6616 /*
6617  * Set up the write DS mirror call for the pNFS I/O thread.
6618  */
6619 static int
6620 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6621     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6622     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6623     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6624 {
6625         int error, ret;
6626
6627         error = 0;
6628         drpc->done = 0;
6629         drpc->vp = vp;
6630         drpc->iomode = *iomode;
6631         drpc->must_commit = *must_commit;
6632         drpc->stateidp = stateidp;
6633         drpc->dsp = dsp;
6634         drpc->off = off;
6635         drpc->len = len;
6636         drpc->fhp = fhp;
6637         drpc->m = m;
6638         drpc->vers = vers;
6639         drpc->minorvers = minorvers;
6640         drpc->cred = cred;
6641         drpc->p = p;
6642         drpc->inprog = 0;
6643         ret = EIO;
6644         if (nfs_pnfsiothreads != 0) {
6645                 ret = nfs_pnfsio(start_writedsmir, drpc);
6646                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6647         }
6648         if (ret != 0)
6649                 error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6650                     dsp, off, len, fhp, m, vers, minorvers, cred, p);
6651         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6652         return (error);
6653 }
6654
6655 /*
6656  * Free up the nfsclds structure.
6657  */
6658 void
6659 nfscl_freenfsclds(struct nfsclds *dsp)
6660 {
6661         int i;
6662
6663         if (dsp == NULL)
6664                 return;
6665         if (dsp->nfsclds_sockp != NULL) {
6666                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6667                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6668                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6669                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6670         }
6671         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6672         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6673         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6674                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6675                         m_freem(
6676                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6677         }
6678         free(dsp, M_NFSCLDS);
6679 }
6680
6681 static enum nfsclds_state
6682 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6683     struct nfsclds **retdspp, uint32_t *sequencep)
6684 {
6685         struct nfsclds *dsp;
6686         int fndseq;
6687
6688         /*
6689          * Search the list of nfsclds structures for one with the same
6690          * server.
6691          */
6692         fndseq = 0;
6693         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6694                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6695                     dsp->nfsclds_servownlen != 0 &&
6696                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6697                     dsp->nfsclds_servownlen) &&
6698                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6699                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6700                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6701                             dsp->nfsclds_flags);
6702                         if (fndseq == 0) {
6703                                 /* Get sequenceid# from first entry. */
6704                                 *sequencep =
6705                                     dsp->nfsclds_sess.nfsess_sequenceid;
6706                                 fndseq = 1;
6707                         }
6708                         /* Server major id matches. */
6709                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6710                                 *retdspp = dsp;
6711                                 return (NFSDSP_USETHISSESSION);
6712                         }
6713                 }
6714         }
6715         if (fndseq != 0)
6716                 return (NFSDSP_SEQTHISSESSION);
6717         return (NFSDSP_NOTFOUND);
6718 }
6719
6720 /*
6721  * NFS commit rpc to a NFSv4.1 DS.
6722  */
6723 static int
6724 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6725     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6726     NFSPROC_T *p)
6727 {
6728         uint32_t *tl;
6729         struct nfsrv_descript nfsd, *nd = &nfsd;
6730         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6731         struct nfssockreq *nrp;
6732         struct nfsvattr na;
6733         int attrflag, error;
6734
6735         nd->nd_mrep = NULL;
6736         if (vers == 0 || vers == NFS_VER4) {
6737                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6738                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6739                 vers = NFS_VER4;
6740         } else
6741                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6742                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6743         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6744             minorvers);
6745         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6746         txdr_hyper(offset, tl);
6747         tl += 2;
6748         *tl = txdr_unsigned(cnt);
6749         nrp = dsp->nfsclds_sockp;
6750         if (nrp == NULL)
6751                 /* If NULL, use the MDS socket. */
6752                 nrp = &nmp->nm_sockreq;
6753         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6754             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6755         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6756             nd->nd_repstat);
6757         if (error != 0)
6758                 return (error);
6759         if (nd->nd_repstat == 0) {
6760                 if (vers == NFS_VER3) {
6761                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6762                             NULL);
6763                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6764                         if (error != 0)
6765                                 goto nfsmout;
6766                 }
6767                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6768                 NFSLOCKDS(dsp);
6769                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6770                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6771                         error = NFSERR_STALEWRITEVERF;
6772                 }
6773                 NFSUNLOCKDS(dsp);
6774         }
6775 nfsmout:
6776         if (error == 0 && nd->nd_repstat != 0)
6777                 error = nd->nd_repstat;
6778         m_freem(nd->nd_mrep);
6779         return (error);
6780 }
6781
6782 /*
6783  * Start up the thread that will execute nfsrpc_commitds().
6784  */
6785 static void
6786 start_commitds(void *arg, int pending)
6787 {
6788         struct nfsclwritedsdorpc *drpc;
6789
6790         drpc = (struct nfsclwritedsdorpc *)arg;
6791         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
6792             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
6793             drpc->p);
6794         drpc->done = 1;
6795         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
6796 }
6797
6798 /*
6799  * Set up the commit DS mirror call for the pNFS I/O thread.
6800  */
6801 static int
6802 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6803     struct nfsfh *fhp, int vers, int minorvers,
6804     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6805 {
6806         int error, ret;
6807
6808         error = 0;
6809         drpc->done = 0;
6810         drpc->vp = vp;
6811         drpc->off = offset;
6812         drpc->len = cnt;
6813         drpc->dsp = dsp;
6814         drpc->fhp = fhp;
6815         drpc->vers = vers;
6816         drpc->minorvers = minorvers;
6817         drpc->cred = cred;
6818         drpc->p = p;
6819         drpc->inprog = 0;
6820         ret = EIO;
6821         if (nfs_pnfsiothreads != 0) {
6822                 ret = nfs_pnfsio(start_commitds, drpc);
6823                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
6824         }
6825         if (ret != 0)
6826                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
6827                     minorvers, cred, p);
6828         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
6829         return (error);
6830 }
6831
6832 /*
6833  * NFS Advise rpc
6834  */
6835 int
6836 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
6837     struct ucred *cred, NFSPROC_T *p)
6838 {
6839         u_int32_t *tl;
6840         struct nfsrv_descript nfsd, *nd = &nfsd;
6841         nfsattrbit_t hints;
6842         int error;
6843
6844         NFSZERO_ATTRBIT(&hints);
6845         if (advise == POSIX_FADV_WILLNEED)
6846                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6847         else if (advise == POSIX_FADV_DONTNEED)
6848                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6849         else
6850                 return (0);
6851         NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp);
6852         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6853         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
6854         txdr_hyper(offset, tl);
6855         tl += 2;
6856         txdr_hyper(cnt, tl);
6857         nfsrv_putattrbit(nd, &hints);
6858         error = nfscl_request(nd, vp, p, cred, NULL);
6859         if (error != 0)
6860                 return (error);
6861         if (nd->nd_repstat != 0)
6862                 error = nd->nd_repstat;
6863         m_freem(nd->nd_mrep);
6864         return (error);
6865 }
6866
6867 #ifdef notyet
6868 /*
6869  * NFS advise rpc to a NFSv4.2 DS.
6870  */
6871 static int
6872 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6873     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6874     struct ucred *cred, NFSPROC_T *p)
6875 {
6876         uint32_t *tl;
6877         struct nfsrv_descript nfsd, *nd = &nfsd;
6878         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6879         struct nfssockreq *nrp;
6880         nfsattrbit_t hints;
6881         int error;
6882
6883         /* For NFS DSs prior to NFSv4.2, just return OK. */
6884         if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
6885                 return (0);
6886         NFSZERO_ATTRBIT(&hints);
6887         if (advise == POSIX_FADV_WILLNEED)
6888                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6889         else if (advise == POSIX_FADV_DONTNEED)
6890                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6891         else
6892                 return (0);
6893         nd->nd_mrep = NULL;
6894         nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
6895             fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6896         vers = NFS_VER4;
6897         NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
6898             minorvers);
6899         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6900         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6901         txdr_hyper(offset, tl);
6902         tl += 2;
6903         *tl = txdr_unsigned(cnt);
6904         nfsrv_putattrbit(nd, &hints);
6905         nrp = dsp->nfsclds_sockp;
6906         if (nrp == NULL)
6907                 /* If NULL, use the MDS socket. */
6908                 nrp = &nmp->nm_sockreq;
6909         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6910             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6911         NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
6912             nd->nd_repstat);
6913         if (error != 0)
6914                 return (error);
6915         if (nd->nd_repstat != 0)
6916                 error = nd->nd_repstat;
6917         m_freem(nd->nd_mrep);
6918         return (error);
6919 }
6920
6921 /*
6922  * Start up the thread that will execute nfsrpc_commitds().
6923  */
6924 static void
6925 start_adviseds(void *arg, int pending)
6926 {
6927         struct nfsclwritedsdorpc *drpc;
6928
6929         drpc = (struct nfsclwritedsdorpc *)arg;
6930         drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
6931             drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
6932             drpc->cred, drpc->p);
6933         drpc->done = 1;
6934         NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
6935 }
6936
6937 /*
6938  * Set up the commit DS mirror call for the pNFS I/O thread.
6939  */
6940 static int
6941 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6942     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6943     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6944 {
6945         int error, ret;
6946
6947         error = 0;
6948         drpc->done = 0;
6949         drpc->vp = vp;
6950         drpc->off = offset;
6951         drpc->len = cnt;
6952         drpc->advise = advise;
6953         drpc->dsp = dsp;
6954         drpc->fhp = fhp;
6955         drpc->vers = vers;
6956         drpc->minorvers = minorvers;
6957         drpc->cred = cred;
6958         drpc->p = p;
6959         drpc->inprog = 0;
6960         ret = EIO;
6961         if (nfs_pnfsiothreads != 0) {
6962                 ret = nfs_pnfsio(start_adviseds, drpc);
6963                 NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
6964         }
6965         if (ret != 0)
6966                 error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
6967                     minorvers, cred, p);
6968         NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
6969         return (error);
6970 }
6971 #endif  /* notyet */
6972
6973 /*
6974  * Do the Allocate operation, retrying for recovery.
6975  */
6976 int
6977 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
6978     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
6979 {
6980         int error, expireret = 0, retrycnt, nostateid;
6981         uint32_t clidrev = 0;
6982         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6983         struct nfsfh *nfhp = NULL;
6984         nfsv4stateid_t stateid;
6985         off_t tmp_off;
6986         void *lckp;
6987
6988         if (len < 0)
6989                 return (EINVAL);
6990         if (len == 0)
6991                 return (0);
6992         tmp_off = off + len;
6993         NFSLOCKMNT(nmp);
6994         if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
6995                 NFSUNLOCKMNT(nmp);
6996                 return (EFBIG);
6997         }
6998         if (nmp->nm_clp != NULL)
6999                 clidrev = nmp->nm_clp->nfsc_clientidrev;
7000         NFSUNLOCKMNT(nmp);
7001         nfhp = VTONFS(vp)->n_fhp;
7002         retrycnt = 0;
7003         do {
7004                 lckp = NULL;
7005                 nostateid = 0;
7006                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7007                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7008                 if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7009                     stateid.other[2] == 0) {
7010                         nostateid = 1;
7011                         NFSCL_DEBUG(1, "stateid0 in allocate\n");
7012                 }
7013
7014                 /*
7015                  * Not finding a stateid should probably never happen,
7016                  * but just return an error for this case.
7017                  */
7018                 if (nostateid != 0)
7019                         error = EIO;
7020                 else
7021                         error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7022                             nap, attrflagp, cred, p, stuff);
7023                 if (error == NFSERR_STALESTATEID)
7024                         nfscl_initiate_recovery(nmp->nm_clp);
7025                 if (lckp != NULL)
7026                         nfscl_lockderef(lckp);
7027                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7028                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7029                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7030                         (void) nfs_catnap(PZERO, error, "nfs_allocate");
7031                 } else if ((error == NFSERR_EXPIRED ||
7032                     error == NFSERR_BADSTATEID) && clidrev != 0) {
7033                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7034                 }
7035                 retrycnt++;
7036         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7037             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7038             error == NFSERR_STALEDONTRECOVER ||
7039             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7040             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7041              expireret == 0 && clidrev != 0 && retrycnt < 4));
7042         if (error != 0 && retrycnt >= 4)
7043                 error = EIO;
7044         return (error);
7045 }
7046
7047 /*
7048  * The allocate RPC.
7049  */
7050 static int
7051 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7052     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p,
7053     void *stuff)
7054 {
7055         uint32_t *tl;
7056         int error;
7057         struct nfsrv_descript nfsd;
7058         struct nfsrv_descript *nd = &nfsd;
7059         nfsattrbit_t attrbits;
7060
7061         *attrflagp = 0;
7062         NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp);
7063         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7064         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7065         txdr_hyper(off, tl); tl += 2;
7066         txdr_hyper(len, tl); tl += 2;
7067         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7068         NFSGETATTR_ATTRBIT(&attrbits);
7069         nfsrv_putattrbit(nd, &attrbits);
7070         error = nfscl_request(nd, vp, p, cred, stuff);
7071         if (error != 0)
7072                 return (error);
7073         if (nd->nd_repstat == 0) {
7074                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7075                 error = nfsm_loadattr(nd, nap);
7076                 if (error == 0)
7077                         *attrflagp = NFS_LATTR_NOSHRINK;
7078         } else
7079                 error = nd->nd_repstat;
7080 nfsmout:
7081         m_freem(nd->nd_mrep);
7082         return (error);
7083 }
7084
7085 /*
7086  * Set up the XDR arguments for the LayoutGet operation.
7087  */
7088 static void
7089 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7090     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7091     int layoutlen, int usecurstateid)
7092 {
7093         uint32_t *tl;
7094
7095         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7096             NFSX_STATEID);
7097         *tl++ = newnfs_false;           /* Don't signal availability. */
7098         *tl++ = txdr_unsigned(layouttype);
7099         *tl++ = txdr_unsigned(iomode);
7100         txdr_hyper(offset, tl);
7101         tl += 2;
7102         txdr_hyper(len, tl);
7103         tl += 2;
7104         txdr_hyper(minlen, tl);
7105         tl += 2;
7106         if (usecurstateid != 0) {
7107                 /* Special stateid for Current stateid. */
7108                 *tl++ = txdr_unsigned(1);
7109                 *tl++ = 0;
7110                 *tl++ = 0;
7111                 *tl++ = 0;
7112         } else {
7113                 *tl++ = txdr_unsigned(stateidp->seqid);
7114                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7115                 *tl++ = stateidp->other[0];
7116                 *tl++ = stateidp->other[1];
7117                 *tl++ = stateidp->other[2];
7118         }
7119         *tl = txdr_unsigned(layoutlen);
7120 }
7121
7122 /*
7123  * Parse the reply for a successful LayoutGet operation.
7124  */
7125 static int
7126 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7127     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7128 {
7129         uint32_t *tl;
7130         struct nfsclflayout *flp, *prevflp, *tflp;
7131         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7132         int m, mirrorcnt;
7133         uint64_t retlen, off;
7134         struct nfsfh *nfhp;
7135         uint8_t *cp;
7136         uid_t user;
7137         gid_t grp;
7138
7139         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7140         error = 0;
7141         flp = NULL;
7142         gotiomode = -1;
7143         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7144         if (*tl++ != 0)
7145                 *retonclosep = 1;
7146         else
7147                 *retonclosep = 0;
7148         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7149         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7150             (int)stateidp->seqid);
7151         stateidp->other[0] = *tl++;
7152         stateidp->other[1] = *tl++;
7153         stateidp->other[2] = *tl++;
7154         cnt = fxdr_unsigned(int, *tl);
7155         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7156         if (cnt <= 0 || cnt > 10000) {
7157                 /* Don't accept more than 10000 layouts in reply. */
7158                 error = NFSERR_BADXDR;
7159                 goto nfsmout;
7160         }
7161         for (i = 0; i < cnt; i++) {
7162                 /* Dissect to the layout type. */
7163                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7164                     3 * NFSX_UNSIGNED);
7165                 off = fxdr_hyper(tl); tl += 2;
7166                 retlen = fxdr_hyper(tl); tl += 2;
7167                 iomode = fxdr_unsigned(int, *tl++);
7168                 laytype = fxdr_unsigned(int, *tl);
7169                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7170                     (uintmax_t)off, (uintmax_t)retlen, iomode);
7171                 /* Ignore length of layout body for now. */
7172                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7173                         /* Parse the File layout up to fhcnt. */
7174                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7175                             NFSX_HYPER + NFSX_V4DEVICEID);
7176                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
7177                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
7178                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7179                         if (fhcnt < 0 || fhcnt > 100) {
7180                                 /* Don't accept more than 100 file handles. */
7181                                 error = NFSERR_BADXDR;
7182                                 goto nfsmout;
7183                         }
7184                         if (fhcnt > 0)
7185                                 flp = malloc(sizeof(*flp) + fhcnt *
7186                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
7187                                     M_WAITOK);
7188                         else
7189                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7190                                     M_WAITOK);
7191                         flp->nfsfl_flags = NFSFL_FILE;
7192                         flp->nfsfl_fhcnt = 0;
7193                         flp->nfsfl_devp = NULL;
7194                         flp->nfsfl_off = off;
7195                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7196                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7197                         else
7198                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7199                         flp->nfsfl_iomode = iomode;
7200                         if (gotiomode == -1)
7201                                 gotiomode = flp->nfsfl_iomode;
7202                         /* Ignore layout body length for now. */
7203                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7204                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7205                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7206                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7207                         mtx_lock(&nmp->nm_mtx);
7208                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7209                             NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7210                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7211                         mtx_unlock(&nmp->nm_mtx);
7212                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7213                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7214                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7215                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7216                         for (j = 0; j < fhcnt; j++) {
7217                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7218                                 nfhlen = fxdr_unsigned(int, *tl);
7219                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7220                                         error = NFSERR_BADXDR;
7221                                         goto nfsmout;
7222                                 }
7223                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7224                                     M_NFSFH, M_WAITOK);
7225                                 flp->nfsfl_fh[j] = nfhp;
7226                                 flp->nfsfl_fhcnt++;
7227                                 nfhp->nfh_len = nfhlen;
7228                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7229                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7230                         }
7231                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
7232                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7233                             NFSX_HYPER);
7234                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7235                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7236                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7237                                 error = NFSERR_BADXDR;
7238                                 goto nfsmout;
7239                         }
7240                         flp = malloc(sizeof(*flp) + mirrorcnt *
7241                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7242                         flp->nfsfl_flags = NFSFL_FLEXFILE;
7243                         flp->nfsfl_mirrorcnt = mirrorcnt;
7244                         for (j = 0; j < mirrorcnt; j++)
7245                                 flp->nfsfl_ffm[j].devp = NULL;
7246                         flp->nfsfl_off = off;
7247                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7248                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7249                         else
7250                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7251                         flp->nfsfl_iomode = iomode;
7252                         if (gotiomode == -1)
7253                                 gotiomode = flp->nfsfl_iomode;
7254                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
7255                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
7256                             (uintmax_t)flp->nfsfl_stripeunit);
7257                         for (j = 0; j < mirrorcnt; j++) {
7258                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7259                                 k = fxdr_unsigned(int, *tl);
7260                                 if (k < 1 || k > 128) {
7261                                         error = NFSERR_BADXDR;
7262                                         goto nfsmout;
7263                                 }
7264                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
7265                                 for (l = 0; l < k; l++) {
7266                                         NFSM_DISSECT(tl, uint32_t *,
7267                                             NFSX_V4DEVICEID + NFSX_STATEID +
7268                                             2 * NFSX_UNSIGNED);
7269                                         if (l == 0) {
7270                                                 /* Just use the first server. */
7271                                                 NFSBCOPY(tl,
7272                                                     flp->nfsfl_ffm[j].dev,
7273                                                     NFSX_V4DEVICEID);
7274                                                 tl += (NFSX_V4DEVICEID /
7275                                                     NFSX_UNSIGNED);
7276                                                 tl++;
7277                                                 flp->nfsfl_ffm[j].st.seqid =
7278                                                     *tl++;
7279                                                 flp->nfsfl_ffm[j].st.other[0] =
7280                                                     *tl++;
7281                                                 flp->nfsfl_ffm[j].st.other[1] =
7282                                                     *tl++;
7283                                                 flp->nfsfl_ffm[j].st.other[2] =
7284                                                     *tl++;
7285                                                 NFSCL_DEBUG(4, "st.seqid=%u "
7286                                                  "st.o0=0x%x st.o1=0x%x "
7287                                                  "st.o2=0x%x\n",
7288                                                  flp->nfsfl_ffm[j].st.seqid,
7289                                                  flp->nfsfl_ffm[j].st.other[0],
7290                                                  flp->nfsfl_ffm[j].st.other[1],
7291                                                  flp->nfsfl_ffm[j].st.other[2]);
7292                                         } else
7293                                                 tl += ((NFSX_V4DEVICEID +
7294                                                     NFSX_STATEID +
7295                                                     NFSX_UNSIGNED) /
7296                                                     NFSX_UNSIGNED);
7297                                         fhcnt = fxdr_unsigned(int, *tl);
7298                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7299                                         if (fhcnt < 1 ||
7300                                             fhcnt > NFSDEV_MAXVERS) {
7301                                                 error = NFSERR_BADXDR;
7302                                                 goto nfsmout;
7303                                         }
7304                                         for (m = 0; m < fhcnt; m++) {
7305                                                 NFSM_DISSECT(tl, uint32_t *,
7306                                                     NFSX_UNSIGNED);
7307                                                 nfhlen = fxdr_unsigned(int,
7308                                                     *tl);
7309                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
7310                                                     nfhlen);
7311                                                 if (nfhlen <= 0 || nfhlen >
7312                                                     NFSX_V4FHMAX) {
7313                                                         error = NFSERR_BADXDR;
7314                                                         goto nfsmout;
7315                                                 }
7316                                                 NFSM_DISSECT(cp, uint8_t *,
7317                                                     NFSM_RNDUP(nfhlen));
7318                                                 if (l == 0) {
7319                                                         flp->nfsfl_ffm[j].fhcnt 
7320                                                             = fhcnt;
7321                                                         nfhp = malloc(
7322                                                             sizeof(*nfhp) +
7323                                                             nfhlen - 1, M_NFSFH,
7324                                                             M_WAITOK);
7325                                                         flp->nfsfl_ffm[j].fh[m]
7326                                                             = nfhp;
7327                                                         nfhp->nfh_len = nfhlen;
7328                                                         NFSBCOPY(cp,
7329                                                             nfhp->nfh_fh,
7330                                                             nfhlen);
7331                                                         NFSCL_DEBUG(4,
7332                                                             "got fh\n");
7333                                                 }
7334                                         }
7335                                         /* Now, get the ffsd_user/ffds_group. */
7336                                         error = nfsrv_parseug(nd, 0, &user,
7337                                             &grp, curthread);
7338                                         NFSCL_DEBUG(4, "after parseu=%d\n",
7339                                             error);
7340                                         if (error == 0)
7341                                                 error = nfsrv_parseug(nd, 1,
7342                                                     &user, &grp, curthread);
7343                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
7344                                             grp);
7345                                         if (error != 0)
7346                                                 goto nfsmout;
7347                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
7348                                             user, grp);
7349                                         if (l == 0) {
7350                                                 flp->nfsfl_ffm[j].user = user;
7351                                                 flp->nfsfl_ffm[j].group = grp;
7352                                                 NFSCL_DEBUG(4,
7353                                                     "usr=%d grp=%d\n", user,
7354                                                     grp);
7355                                         }
7356                                 }
7357                         }
7358                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7359                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7360 #ifdef notnow
7361                         /*
7362                          * At this time, there is no flag.
7363                          * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7364                          * added, or it may never exist?
7365                          */
7366                         mtx_lock(&nmp->nm_mtx);
7367                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7368                             NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7369                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7370                         mtx_unlock(&nmp->nm_mtx);
7371 #endif
7372                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7373                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7374                             flp->nfsfl_fflags, flp->nfsfl_statshint);
7375                 } else {
7376                         error = NFSERR_BADXDR;
7377                         goto nfsmout;
7378                 }
7379                 if (flp->nfsfl_iomode == gotiomode) {
7380                         /* Keep the list in increasing offset order. */
7381                         tflp = LIST_FIRST(flhp);
7382                         prevflp = NULL;
7383                         while (tflp != NULL &&
7384                             tflp->nfsfl_off < flp->nfsfl_off) {
7385                                 prevflp = tflp;
7386                                 tflp = LIST_NEXT(tflp, nfsfl_list);
7387                         }
7388                         if (prevflp == NULL)
7389                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7390                         else
7391                                 LIST_INSERT_AFTER(prevflp, flp,
7392                                     nfsfl_list);
7393                         NFSCL_DEBUG(4, "flp inserted\n");
7394                 } else {
7395                         printf("nfscl_layoutget(): got wrong iomode\n");
7396                         nfscl_freeflayout(flp);
7397                 }
7398                 flp = NULL;
7399         }
7400 nfsmout:
7401         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7402         if (error != 0 && flp != NULL)
7403                 nfscl_freeflayout(flp);
7404         return (error);
7405 }
7406
7407 /*
7408  * Parse a user/group digit string.
7409  */
7410 static int
7411 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7412     NFSPROC_T *p)
7413 {
7414         uint32_t *tl;
7415         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7416         uint32_t len = 0;
7417         int error = 0;
7418
7419         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7420         len = fxdr_unsigned(uint32_t, *tl);
7421         str = NULL;
7422         if (len > NFSV4_OPAQUELIMIT) {
7423                 error = NFSERR_BADXDR;
7424                 goto nfsmout;
7425         }
7426         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7427         if (len == 0) {
7428                 if (dogrp != 0)
7429                         *gidp = GID_NOGROUP;
7430                 else
7431                         *uidp = UID_NOBODY;
7432                 return (0);
7433         }
7434         if (len > NFSV4_SMALLSTR)
7435                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7436         else
7437                 str = str0;
7438         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7439         NFSBCOPY(cp, str, len);
7440         str[len] = '\0';
7441         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7442         if (dogrp != 0)
7443                 error = nfsv4_strtogid(nd, str, len, gidp);
7444         else
7445                 error = nfsv4_strtouid(nd, str, len, uidp);
7446 nfsmout:
7447         if (len > NFSV4_SMALLSTR)
7448                 free(str, M_TEMP);
7449         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7450         return (error);
7451 }
7452
7453 /*
7454  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7455  * so that it does both an Open and a Layoutget.
7456  */
7457 static int
7458 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7459     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7460     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7461     struct ucred *cred, NFSPROC_T *p)
7462 {
7463         struct nfscllayout *lyp;
7464         struct nfsclflayout *flp;
7465         struct nfsclflayouthead flh;
7466         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7467         int layouttype, laystat;
7468         nfsv4stateid_t stateid;
7469         struct nfsclsession *tsep;
7470
7471         error = 0;
7472         if (NFSHASFLEXFILE(nmp))
7473                 layouttype = NFSLAYOUT_FLEXFILE;
7474         else
7475                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7476         /*
7477          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7478          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7479          * flp == NULL.
7480          */
7481         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7482             &recalled);
7483         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7484         if (lyp == NULL)
7485                 islocked = 0;
7486         else if (flp != NULL)
7487                 islocked = 1;
7488         else
7489                 islocked = 2;
7490         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7491                 LIST_INIT(&flh);
7492                 tsep = nfsmnt_mdssession(nmp);
7493                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7494                     3 * NFSX_UNSIGNED);
7495                 if (lyp == NULL)
7496                         usecurstateid = 1;
7497                 else {
7498                         usecurstateid = 0;
7499                         stateid.seqid = lyp->nfsly_stateid.seqid;
7500                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7501                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7502                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7503                 }
7504                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7505                     newfhp, newfhlen, mode, op, name, namelen,
7506                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7507                     &retonclose, &flh, &laystat, cred, p);
7508                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7509                     laystat, error);
7510                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7511                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7512                     &islocked, cred, p);
7513         } else
7514                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7515                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7516         if (islocked == 2)
7517                 nfscl_rellayout(lyp, 1);
7518         else if (islocked == 1)
7519                 nfscl_rellayout(lyp, 0);
7520         return (error);
7521 }
7522
7523 /*
7524  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7525  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7526  * handled by nfsrpc_openrpc().
7527  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7528  * can be NULL.
7529  */
7530 static int
7531 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7532     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7533     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7534     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7535     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7536     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7537 {
7538         uint32_t *tl;
7539         struct nfsrv_descript nfsd, *nd = &nfsd;
7540         struct nfscldeleg *ndp = NULL;
7541         struct nfsvattr nfsva;
7542         struct nfsclsession *tsep;
7543         uint32_t rflags, deleg;
7544         nfsattrbit_t attrbits;
7545         int error, ret, acesize, limitby, iomode;
7546
7547         *dpp = NULL;
7548         *laystatp = ENXIO;
7549         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7550             0, 0);
7551         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7552         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7553         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7554         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7555         tsep = nfsmnt_mdssession(nmp);
7556         *tl++ = tsep->nfsess_clientid.lval[0];
7557         *tl = tsep->nfsess_clientid.lval[1];
7558         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7559         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7560         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7561         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7562         nfsm_strtom(nd, name, namelen);
7563         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7564         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7565         NFSZERO_ATTRBIT(&attrbits);
7566         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7567         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7568         nfsrv_putattrbit(nd, &attrbits);
7569         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7570         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7571         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7572                 iomode = NFSLAYOUTIOMODE_RW;
7573         else
7574                 iomode = NFSLAYOUTIOMODE_READ;
7575         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7576             layouttype, layoutlen, usecurstateid);
7577         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7578             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7579         if (error != 0)
7580                 return (error);
7581         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7582         if (nd->nd_repstat != 0)
7583                 *laystatp = nd->nd_repstat;
7584         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7585                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7586                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7587                     6 * NFSX_UNSIGNED);
7588                 op->nfso_stateid.seqid = *tl++;
7589                 op->nfso_stateid.other[0] = *tl++;
7590                 op->nfso_stateid.other[1] = *tl++;
7591                 op->nfso_stateid.other[2] = *tl;
7592                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7593                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7594                 if (error != 0)
7595                         goto nfsmout;
7596                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7597                 deleg = fxdr_unsigned(u_int32_t, *tl);
7598                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7599                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7600                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7601                               NFSCLFLAGS_FIRSTDELEG))
7602                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7603                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7604                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7605                             M_NFSCLDELEG, M_WAITOK);
7606                         LIST_INIT(&ndp->nfsdl_owner);
7607                         LIST_INIT(&ndp->nfsdl_lock);
7608                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7609                         ndp->nfsdl_fhlen = newfhlen;
7610                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7611                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7612                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7613                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7614                             NFSX_UNSIGNED);
7615                         ndp->nfsdl_stateid.seqid = *tl++;
7616                         ndp->nfsdl_stateid.other[0] = *tl++;
7617                         ndp->nfsdl_stateid.other[1] = *tl++;
7618                         ndp->nfsdl_stateid.other[2] = *tl++;
7619                         ret = fxdr_unsigned(int, *tl);
7620                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7621                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7622                                 /*
7623                                  * Indicates how much the file can grow.
7624                                  */
7625                                 NFSM_DISSECT(tl, u_int32_t *,
7626                                     3 * NFSX_UNSIGNED);
7627                                 limitby = fxdr_unsigned(int, *tl++);
7628                                 switch (limitby) {
7629                                 case NFSV4OPEN_LIMITSIZE:
7630                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7631                                         break;
7632                                 case NFSV4OPEN_LIMITBLOCKS:
7633                                         ndp->nfsdl_sizelimit =
7634                                             fxdr_unsigned(u_int64_t, *tl++);
7635                                         ndp->nfsdl_sizelimit *=
7636                                             fxdr_unsigned(u_int64_t, *tl);
7637                                         break;
7638                                 default:
7639                                         error = NFSERR_BADXDR;
7640                                         goto nfsmout;
7641                                 };
7642                         } else
7643                                 ndp->nfsdl_flags = NFSCLDL_READ;
7644                         if (ret != 0)
7645                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7646                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7647                             &acesize, p);
7648                         if (error != 0)
7649                                 goto nfsmout;
7650                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7651                         error = NFSERR_BADXDR;
7652                         goto nfsmout;
7653                 }
7654                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7655                     nfscl_assumeposixlocks)
7656                         op->nfso_posixlock = 1;
7657                 else
7658                         op->nfso_posixlock = 0;
7659                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7660                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7661                 if (*++tl == 0) {
7662                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7663                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7664                             NULL, NULL, NULL, p, cred);
7665                         if (error != 0)
7666                                 goto nfsmout;
7667                         if (ndp != NULL) {
7668                                 ndp->nfsdl_change = nfsva.na_filerev;
7669                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7670                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7671                                 *dpp = ndp;
7672                                 ndp = NULL;
7673                         }
7674                         /*
7675                          * At this point, the Open has succeeded, so set
7676                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7677                          * this function just won't return a layout.
7678                          */
7679                         if (nd->nd_repstat == 0) {
7680                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7681                                 *laystatp = fxdr_unsigned(int, *++tl);
7682                                 if (*laystatp == 0) {
7683                                         error = nfsrv_parselayoutget(nmp, nd,
7684                                             stateidp, retonclosep, flhp);
7685                                         if (error != 0)
7686                                                 *laystatp = error;
7687                                 }
7688                         } else
7689                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7690                 }
7691         }
7692         if (nd->nd_repstat != 0 && error == 0)
7693                 error = nd->nd_repstat;
7694 nfsmout:
7695         free(ndp, M_NFSCLDELEG);
7696         m_freem(nd->nd_mrep);
7697         return (error);
7698 }
7699
7700 /*
7701  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7702  * Used only for mounts with pNFS enabled.
7703  */
7704 static int
7705 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7706     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7707     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7708     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7709     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7710     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7711     struct nfsclflayouthead *flhp, int *laystatp)
7712 {
7713         uint32_t *tl;
7714         int error = 0, deleg, newone, ret, acesize, limitby;
7715         struct nfsrv_descript nfsd, *nd = &nfsd;
7716         struct nfsclopen *op;
7717         struct nfscldeleg *dp = NULL;
7718         struct nfsnode *np;
7719         struct nfsfh *nfhp;
7720         struct nfsclsession *tsep;
7721         nfsattrbit_t attrbits;
7722         nfsv4stateid_t stateid;
7723         struct nfsmount *nmp;
7724
7725         nmp = VFSTONFS(dvp->v_mount);
7726         np = VTONFS(dvp);
7727         *laystatp = ENXIO;
7728         *unlockedp = 0;
7729         *nfhpp = NULL;
7730         *dpp = NULL;
7731         *attrflagp = 0;
7732         *dattrflagp = 0;
7733         if (namelen > NFS_MAXNAMLEN)
7734                 return (ENAMETOOLONG);
7735         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7736         /*
7737          * For V4, this is actually an Open op.
7738          */
7739         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7740         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7741         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7742             NFSV4OPEN_ACCESSREAD);
7743         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7744         tsep = nfsmnt_mdssession(nmp);
7745         *tl++ = tsep->nfsess_clientid.lval[0];
7746         *tl = tsep->nfsess_clientid.lval[1];
7747         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7748         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7749         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7750         if ((fmode & O_EXCL) != 0) {
7751                 if (NFSHASSESSPERSIST(nmp)) {
7752                         /* Use GUARDED for persistent sessions. */
7753                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
7754                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7755                 } else {
7756                         /* Otherwise, use EXCLUSIVE4_1. */
7757                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7758                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7759                         *tl++ = cverf.lval[0];
7760                         *tl = cverf.lval[1];
7761                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7762                 }
7763         } else {
7764                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7765                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
7766         }
7767         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7768         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7769         nfsm_strtom(nd, name, namelen);
7770         /* Get the new file's handle and attributes, plus save the FH. */
7771         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7772         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7773         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
7774         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7775         NFSGETATTR_ATTRBIT(&attrbits);
7776         nfsrv_putattrbit(nd, &attrbits);
7777         /* Get the directory's post-op attributes. */
7778         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7779         *tl = txdr_unsigned(NFSV4OP_PUTFH);
7780         nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7781         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7782         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7783         nfsrv_putattrbit(nd, &attrbits);
7784         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7785         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7786         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7787         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7788             layouttype, layoutlen, usecurstateid);
7789         error = nfscl_request(nd, dvp, p, cred, dstuff);
7790         if (error != 0)
7791                 return (error);
7792         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7793             error);
7794         if (nd->nd_repstat != 0)
7795                 *laystatp = nd->nd_repstat;
7796         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7797         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7798                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7799                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7800                     6 * NFSX_UNSIGNED);
7801                 stateid.seqid = *tl++;
7802                 stateid.other[0] = *tl++;
7803                 stateid.other[1] = *tl++;
7804                 stateid.other[2] = *tl;
7805                 nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7806                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7807                 deleg = fxdr_unsigned(int, *tl);
7808                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7809                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7810                         if (!(owp->nfsow_clp->nfsc_flags &
7811                               NFSCLFLAGS_FIRSTDELEG))
7812                                 owp->nfsow_clp->nfsc_flags |=
7813                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7814                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7815                             M_NFSCLDELEG, M_WAITOK);
7816                         LIST_INIT(&dp->nfsdl_owner);
7817                         LIST_INIT(&dp->nfsdl_lock);
7818                         dp->nfsdl_clp = owp->nfsow_clp;
7819                         newnfs_copyincred(cred, &dp->nfsdl_cred);
7820                         nfscl_lockinit(&dp->nfsdl_rwlock);
7821                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7822                             NFSX_UNSIGNED);
7823                         dp->nfsdl_stateid.seqid = *tl++;
7824                         dp->nfsdl_stateid.other[0] = *tl++;
7825                         dp->nfsdl_stateid.other[1] = *tl++;
7826                         dp->nfsdl_stateid.other[2] = *tl++;
7827                         ret = fxdr_unsigned(int, *tl);
7828                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7829                                 dp->nfsdl_flags = NFSCLDL_WRITE;
7830                                 /*
7831                                  * Indicates how much the file can grow.
7832                                  */
7833                                 NFSM_DISSECT(tl, u_int32_t *,
7834                                     3 * NFSX_UNSIGNED);
7835                                 limitby = fxdr_unsigned(int, *tl++);
7836                                 switch (limitby) {
7837                                 case NFSV4OPEN_LIMITSIZE:
7838                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
7839                                         break;
7840                                 case NFSV4OPEN_LIMITBLOCKS:
7841                                         dp->nfsdl_sizelimit =
7842                                             fxdr_unsigned(u_int64_t, *tl++);
7843                                         dp->nfsdl_sizelimit *=
7844                                             fxdr_unsigned(u_int64_t, *tl);
7845                                         break;
7846                                 default:
7847                                         error = NFSERR_BADXDR;
7848                                         goto nfsmout;
7849                                 };
7850                         } else {
7851                                 dp->nfsdl_flags = NFSCLDL_READ;
7852                         }
7853                         if (ret != 0)
7854                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
7855                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7856                             &acesize, p);
7857                         if (error != 0)
7858                                 goto nfsmout;
7859                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7860                         error = NFSERR_BADXDR;
7861                         goto nfsmout;
7862                 }
7863
7864                 /* Now, we should have the status for the SaveFH. */
7865                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7866                 if (*++tl == 0) {
7867                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7868                         /*
7869                          * Now, process the GetFH and Getattr for the newly
7870                          * created file. nfscl_mtofh() will set
7871                          * ND_NOMOREDATA if these weren't successful.
7872                          */
7873                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7874                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7875                         if (error != 0)
7876                                 goto nfsmout;
7877                 } else
7878                         nd->nd_flag |= ND_NOMOREDATA;
7879                 /* Now we have the PutFH and Getattr for the directory. */
7880                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7881                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7882                         if (*++tl != 0)
7883                                 nd->nd_flag |= ND_NOMOREDATA;
7884                         else {
7885                                 NFSM_DISSECT(tl, uint32_t *, 2 *
7886                                     NFSX_UNSIGNED);
7887                                 if (*++tl != 0)
7888                                         nd->nd_flag |= ND_NOMOREDATA;
7889                         }
7890                 }
7891                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7892                         /* Load the directory attributes. */
7893                         error = nfsm_loadattr(nd, dnap);
7894                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7895                         if (error != 0)
7896                                 goto nfsmout;
7897                         *dattrflagp = 1;
7898                         if (dp != NULL && *attrflagp != 0) {
7899                                 dp->nfsdl_change = nnap->na_filerev;
7900                                 dp->nfsdl_modtime = nnap->na_mtime;
7901                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7902                         }
7903                         /*
7904                          * We can now complete the Open state.
7905                          */
7906                         nfhp = *nfhpp;
7907                         if (dp != NULL) {
7908                                 dp->nfsdl_fhlen = nfhp->nfh_len;
7909                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7910                                     nfhp->nfh_len);
7911                         }
7912                         /*
7913                          * Get an Open structure that will be
7914                          * attached to the OpenOwner, acquired already.
7915                          */
7916                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
7917                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7918                             cred, p, NULL, &op, &newone, NULL, 0);
7919                         if (error != 0)
7920                                 goto nfsmout;
7921                         op->nfso_stateid = stateid;
7922                         newnfs_copyincred(cred, &op->nfso_cred);
7923
7924                         nfscl_openrelease(nmp, op, error, newone);
7925                         *unlockedp = 1;
7926
7927                         /* Now, handle the RestoreFH and LayoutGet. */
7928                         if (nd->nd_repstat == 0) {
7929                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7930                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
7931                                 if (*laystatp == 0) {
7932                                         error = nfsrv_parselayoutget(nmp, nd,
7933                                             stateidp, retonclosep, flhp);
7934                                         if (error != 0)
7935                                                 *laystatp = error;
7936                                 }
7937                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7938                                     error);
7939                         } else
7940                                 nd->nd_repstat = 0;
7941                 }
7942         }
7943         if (nd->nd_repstat != 0 && error == 0)
7944                 error = nd->nd_repstat;
7945         if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7946                 nfscl_initiate_recovery(owp->nfsow_clp);
7947 nfsmout:
7948         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7949         if (error == 0)
7950                 *dpp = dp;
7951         else
7952                 free(dp, M_NFSCLDELEG);
7953         m_freem(nd->nd_mrep);
7954         return (error);
7955 }
7956
7957 /*
7958  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7959  */
7960 static int
7961 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7962     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7963     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7964     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7965     int *dattrflagp, void *dstuff, int *unlockedp)
7966 {
7967         struct nfscllayout *lyp;
7968         struct nfsclflayouthead flh;
7969         struct nfsfh *nfhp;
7970         struct nfsclsession *tsep;
7971         struct nfsmount *nmp;
7972         nfsv4stateid_t stateid;
7973         int error, layoutlen, layouttype, retonclose, laystat;
7974
7975         error = 0;
7976         nmp = VFSTONFS(dvp->v_mount);
7977         if (NFSHASFLEXFILE(nmp))
7978                 layouttype = NFSLAYOUT_FLEXFILE;
7979         else
7980                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7981         LIST_INIT(&flh);
7982         tsep = nfsmnt_mdssession(nmp);
7983         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7984         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7985             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
7986             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
7987             &flh, &laystat);
7988         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
7989             laystat, error);
7990         lyp = NULL;
7991         if (laystat == 0) {
7992                 nfhp = *nfhpp;
7993                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
7994                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
7995                     layouttype, laystat, NULL, cred, p);
7996         } else
7997                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
7998                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
7999                     cred, p);
8000         if (laystat == 0)
8001                 nfscl_rellayout(lyp, 0);
8002         return (error);
8003 }
8004
8005 /*
8006  * Process the results of a layoutget() operation.
8007  */
8008 static int
8009 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8010     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8011     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8012     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8013 {
8014         struct nfsclflayout *tflp;
8015         struct nfscldevinfo *dip;
8016         uint8_t *dev;
8017         int i, mirrorcnt;
8018
8019         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8020                 NFSLOCKMNT(nmp);
8021                 if (!NFSHASFLEXFILE(nmp)) {
8022                         /* Switch to using Flex File Layout. */
8023                         nmp->nm_state |= NFSSTA_FLEXFILE;
8024                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
8025                         /* Disable pNFS. */
8026                         NFSCL_DEBUG(1, "disable PNFS\n");
8027                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8028                 }
8029                 NFSUNLOCKMNT(nmp);
8030         }
8031         if (laystat == 0) {
8032                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8033                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
8034                         if (layouttype == NFSLAYOUT_FLEXFILE)
8035                                 mirrorcnt = tflp->nfsfl_mirrorcnt;
8036                         else
8037                                 mirrorcnt = 1;
8038                         for (i = 0; i < mirrorcnt; i++) {
8039                                 laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8040                                 NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8041                                 if (laystat != 0) {
8042                                         if (layouttype == NFSLAYOUT_FLEXFILE)
8043                                                 dev = tflp->nfsfl_ffm[i].dev;
8044                                         else
8045                                                 dev = tflp->nfsfl_dev;
8046                                         laystat = nfsrpc_getdeviceinfo(nmp, dev,
8047                                             layouttype, notifybit, &dip, cred,
8048                                             p);
8049                                         NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8050                                             laystat);
8051                                         if (laystat != 0)
8052                                                 goto out;
8053                                         laystat = nfscl_adddevinfo(nmp, dip, i,
8054                                             tflp);
8055                                         if (laystat != 0)
8056                                                 printf("nfsrpc_layoutgetresout"
8057                                                     ": cannot add\n");
8058                                 }
8059                         }
8060                 }
8061         }
8062 out:
8063         if (laystat == 0) {
8064                 /*
8065                  * nfscl_layout() always returns with the nfsly_lock
8066                  * set to a refcnt (shared lock).
8067                  * Passing in dvp is sufficient, since it is only used to
8068                  * get the fsid for the file system.
8069                  */
8070                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8071                     layouttype, retonclose, flhp, lypp, cred, p);
8072                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8073                     laystat);
8074                 if (laystat == 0 && islockedp != NULL)
8075                         *islockedp = 1;
8076         }
8077         return (laystat);
8078 }
8079
8080 /*
8081  * nfs copy_file_range operation.
8082  */
8083 int
8084 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8085     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8086     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8087     struct ucred *cred, bool consecutive, bool *must_commitp)
8088 {
8089         int commit, error, expireret = 0, retrycnt;
8090         u_int32_t clidrev = 0;
8091         struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8092         struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8093         nfsv4stateid_t instateid, outstateid;
8094         void *inlckp, *outlckp;
8095
8096         if (nmp->nm_clp != NULL)
8097                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8098         innfhp = VTONFS(invp)->n_fhp;
8099         outnfhp = VTONFS(outvp)->n_fhp;
8100         retrycnt = 0;
8101         do {
8102                 /* Get both stateids. */
8103                 inlckp = NULL;
8104                 nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8105                     NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8106                     &inlckp);
8107                 outlckp = NULL;
8108                 nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8109                     NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8110                     &outlckp);
8111
8112                 error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8113                     &instateid, &outstateid, innap, inattrflagp, outnap,
8114                     outattrflagp, consecutive, &commit, cred, curthread);
8115                 if (error == 0) {
8116                         if (commit != NFSWRITE_FILESYNC)
8117                                 *must_commitp = true;
8118                         *inoffp += *lenp;
8119                         *outoffp += *lenp;
8120                 } else if (error == NFSERR_STALESTATEID)
8121                         nfscl_initiate_recovery(nmp->nm_clp);
8122                 if (inlckp != NULL)
8123                         nfscl_lockderef(inlckp);
8124                 if (outlckp != NULL)
8125                         nfscl_lockderef(outlckp);
8126                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8127                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8128                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8129                         (void) nfs_catnap(PZERO, error, "nfs_cfr");
8130                 } else if ((error == NFSERR_EXPIRED ||
8131                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8132                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8133                             curthread);
8134                 }
8135                 retrycnt++;
8136         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8137             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8138               error == NFSERR_STALEDONTRECOVER ||
8139             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8140             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8141              expireret == 0 && clidrev != 0 && retrycnt < 4));
8142         if (error != 0 && (retrycnt >= 4 ||
8143             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8144               error == NFSERR_STALEDONTRECOVER))
8145                 error = EIO;
8146         return (error);
8147 }
8148
8149 /*
8150  * The copy RPC.
8151  */
8152 static int
8153 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8154     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8155     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8156     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8157     NFSPROC_T *p)
8158 {
8159         uint32_t *tl;
8160         int error;
8161         struct nfsrv_descript nfsd;
8162         struct nfsrv_descript *nd = &nfsd;
8163         struct nfsmount *nmp;
8164         nfsattrbit_t attrbits;
8165         uint64_t len;
8166
8167         nmp = VFSTONFS(outvp->v_mount);
8168         *inattrflagp = *outattrflagp = 0;
8169         *commitp = NFSWRITE_UNSTABLE;
8170         len = *lenp;
8171         *lenp = 0;
8172         if (len > nfs_maxcopyrange)
8173                 len = nfs_maxcopyrange;
8174         NFSCL_REQSTART(nd, NFSPROC_COPY, invp);
8175         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8176         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8177         NFSGETATTR_ATTRBIT(&attrbits);
8178         nfsrv_putattrbit(nd, &attrbits);
8179         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8180         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8181         nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh,
8182             VTONFS(outvp)->n_fhp->nfh_len, 0);
8183         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8184         *tl = txdr_unsigned(NFSV4OP_COPY);
8185         nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8186         nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8187         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8188         txdr_hyper(inoff, tl); tl += 2;
8189         txdr_hyper(outoff, tl); tl += 2;
8190         txdr_hyper(len, tl); tl += 2;
8191         if (consecutive)
8192                 *tl++ = newnfs_true;
8193         else
8194                 *tl++ = newnfs_false;
8195         *tl++ = newnfs_true;
8196         *tl++ = 0;
8197         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8198         NFSWRITEGETATTR_ATTRBIT(&attrbits);
8199         nfsrv_putattrbit(nd, &attrbits);
8200         error = nfscl_request(nd, invp, p, cred, NULL);
8201         if (error != 0)
8202                 return (error);
8203         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8204                 /* Get the input file's attributes. */
8205                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8206                 if (*(tl + 1) == 0) {
8207                         error = nfsm_loadattr(nd, innap);
8208                         if (error != 0)
8209                                 goto nfsmout;
8210                         *inattrflagp = 1;
8211                 } else
8212                         nd->nd_flag |= ND_NOMOREDATA;
8213         }
8214         /* Skip over return stat for PutFH. */
8215         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8216                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8217                 if (*++tl != 0)
8218                         nd->nd_flag |= ND_NOMOREDATA;
8219         }
8220         /* Skip over return stat for Copy. */
8221         if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8222                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8223         if (nd->nd_repstat == 0) {
8224                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8225                 if (*tl != 0) {
8226                         /* There should be no callback ids. */
8227                         error = NFSERR_BADXDR;
8228                         goto nfsmout;
8229                 }
8230                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8231                     NFSX_VERF);
8232                 len = fxdr_hyper(tl); tl += 2;
8233                 *commitp = fxdr_unsigned(int, *tl++);
8234                 NFSLOCKMNT(nmp);
8235                 if (!NFSHASWRITEVERF(nmp)) {
8236                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8237                         NFSSETWRITEVERF(nmp);
8238                 } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8239                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8240                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
8241                 }
8242                 NFSUNLOCKMNT(nmp);
8243                 tl += (NFSX_VERF / NFSX_UNSIGNED);
8244                 if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8245                         /* Must be a synchronous copy. */
8246                         nd->nd_repstat = NFSERR_NOTSUPP;
8247                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8248                 error = nfsm_loadattr(nd, outnap);
8249                 if (error == 0)
8250                         *outattrflagp = NFS_LATTR_NOSHRINK;
8251                 if (nd->nd_repstat == 0)
8252                         *lenp = len;
8253         } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8254                 /*
8255                  * For the case where consecutive is not supported, but
8256                  * synchronous is supported, we can try consecutive == false
8257                  * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8258                  * since Copy cannot be done.
8259                  */
8260                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8261                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8262                         if (!consecutive || *++tl == newnfs_false)
8263                                 nd->nd_repstat = NFSERR_NOTSUPP;
8264                 } else
8265                         nd->nd_repstat = NFSERR_BADXDR;
8266         }
8267         if (error == 0)
8268                 error = nd->nd_repstat;
8269 nfsmout:
8270         m_freem(nd->nd_mrep);
8271         return (error);
8272 }
8273
8274 /*
8275  * Seek operation.
8276  */
8277 int
8278 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8279     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8280 {
8281         int error, expireret = 0, retrycnt;
8282         u_int32_t clidrev = 0;
8283         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8284         struct nfsnode *np = VTONFS(vp);
8285         struct nfsfh *nfhp = NULL;
8286         nfsv4stateid_t stateid;
8287         void *lckp;
8288
8289         if (nmp->nm_clp != NULL)
8290                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8291         nfhp = np->n_fhp;
8292         retrycnt = 0;
8293         do {
8294                 lckp = NULL;
8295                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8296                     NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8297                 error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8298                     nap, attrflagp, cred);
8299                 if (error == NFSERR_STALESTATEID)
8300                         nfscl_initiate_recovery(nmp->nm_clp);
8301                 if (lckp != NULL)
8302                         nfscl_lockderef(lckp);
8303                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8304                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8305                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8306                         (void) nfs_catnap(PZERO, error, "nfs_seek");
8307                 } else if ((error == NFSERR_EXPIRED ||
8308                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8309                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8310                             curthread);
8311                 }
8312                 retrycnt++;
8313         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8314             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8315             error == NFSERR_BADSESSION ||
8316             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8317             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8318              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8319             (error == NFSERR_OPENMODE && retrycnt < 4));
8320         if (error && retrycnt >= 4)
8321                 error = EIO;
8322         return (error);
8323 }
8324
8325 /*
8326  * The seek RPC.
8327  */
8328 static int
8329 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8330     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8331 {
8332         uint32_t *tl;
8333         int error;
8334         struct nfsrv_descript nfsd;
8335         struct nfsrv_descript *nd = &nfsd;
8336         nfsattrbit_t attrbits;
8337
8338         *attrflagp = 0;
8339         NFSCL_REQSTART(nd, NFSPROC_SEEK, vp);
8340         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8341         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8342         txdr_hyper(*offp, tl); tl += 2;
8343         *tl++ = txdr_unsigned(content);
8344         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8345         NFSGETATTR_ATTRBIT(&attrbits);
8346         nfsrv_putattrbit(nd, &attrbits);
8347         error = nfscl_request(nd, vp, curthread, cred, NULL);
8348         if (error != 0)
8349                 return (error);
8350         if (nd->nd_repstat == 0) {
8351                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8352                 if (*tl++ == newnfs_true)
8353                         *eofp = true;
8354                 else
8355                         *eofp = false;
8356                 *offp = fxdr_hyper(tl);
8357                 /* Just skip over Getattr op status. */
8358                 error = nfsm_loadattr(nd, nap);
8359                 if (error == 0)
8360                         *attrflagp = 1;
8361         }
8362         error = nd->nd_repstat;
8363 nfsmout:
8364         m_freem(nd->nd_mrep);
8365         return (error);
8366 }
8367
8368 /*
8369  * The getextattr RPC.
8370  */
8371 int
8372 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8373     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8374 {
8375         uint32_t *tl;
8376         int error;
8377         struct nfsrv_descript nfsd;
8378         struct nfsrv_descript *nd = &nfsd;
8379         nfsattrbit_t attrbits;
8380         uint32_t len, len2;
8381
8382         *attrflagp = 0;
8383         NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp);
8384         nfsm_strtom(nd, name, strlen(name));
8385         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8386         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8387         NFSGETATTR_ATTRBIT(&attrbits);
8388         nfsrv_putattrbit(nd, &attrbits);
8389         error = nfscl_request(nd, vp, p, cred, NULL);
8390         if (error != 0)
8391                 return (error);
8392         if (nd->nd_repstat == 0) {
8393                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8394                 len = fxdr_unsigned(uint32_t, *tl);
8395                 /* Sanity check lengths. */
8396                 if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8397                     uiop->uio_resid <= UINT32_MAX) {
8398                         len2 = uiop->uio_resid;
8399                         if (len2 >= len)
8400                                 error = nfsm_mbufuio(nd, uiop, len);
8401                         else {
8402                                 error = nfsm_mbufuio(nd, uiop, len2);
8403                                 if (error == 0) {
8404                                         /*
8405                                          * nfsm_mbufuio() advances to a multiple
8406                                          * of 4, so round up len2 as well.  Then
8407                                          * we need to advance over the rest of
8408                                          * the data, rounding up the remaining
8409                                          * length.
8410                                          */
8411                                         len2 = NFSM_RNDUP(len2);
8412                                         len2 = NFSM_RNDUP(len - len2);
8413                                         if (len2 > 0)
8414                                                 error = nfsm_advance(nd, len2,
8415                                                     -1);
8416                                 }
8417                         }
8418                 } else if (uiop == NULL && len > 0) {
8419                         /* Just wants the length and not the data. */
8420                         error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8421                 } else if (len > 0)
8422                         error = ENOATTR;
8423                 if (error != 0)
8424                         goto nfsmout;
8425                 *lenp = len;
8426                 /* Just skip over Getattr op status. */
8427                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8428                 error = nfsm_loadattr(nd, nap);
8429                 if (error == 0)
8430                         *attrflagp = 1;
8431         }
8432         if (error == 0)
8433                 error = nd->nd_repstat;
8434 nfsmout:
8435         m_freem(nd->nd_mrep);
8436         return (error);
8437 }
8438
8439 /*
8440  * The setextattr RPC.
8441  */
8442 int
8443 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8444     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8445 {
8446         uint32_t *tl;
8447         int error;
8448         struct nfsrv_descript nfsd;
8449         struct nfsrv_descript *nd = &nfsd;
8450         nfsattrbit_t attrbits;
8451
8452         *attrflagp = 0;
8453         NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp);
8454         if (uiop->uio_resid > nd->nd_maxreq) {
8455                 /* nd_maxreq is set by NFSCL_REQSTART(). */
8456                 m_freem(nd->nd_mreq);
8457                 return (EINVAL);
8458         }
8459         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8460         *tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8461         nfsm_strtom(nd, name, strlen(name));
8462         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8463         *tl = txdr_unsigned(uiop->uio_resid);
8464         nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8465         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8466         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8467         NFSGETATTR_ATTRBIT(&attrbits);
8468         nfsrv_putattrbit(nd, &attrbits);
8469         error = nfscl_request(nd, vp, p, cred, NULL);
8470         if (error != 0)
8471                 return (error);
8472         if (nd->nd_repstat == 0) {
8473                 /* Just skip over the reply and Getattr op status. */
8474                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8475                     NFSX_UNSIGNED);
8476                 error = nfsm_loadattr(nd, nap);
8477                 if (error == 0)
8478                         *attrflagp = 1;
8479         }
8480         if (error == 0)
8481                 error = nd->nd_repstat;
8482 nfsmout:
8483         m_freem(nd->nd_mrep);
8484         return (error);
8485 }
8486
8487 /*
8488  * The removeextattr RPC.
8489  */
8490 int
8491 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8492     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8493 {
8494         uint32_t *tl;
8495         int error;
8496         struct nfsrv_descript nfsd;
8497         struct nfsrv_descript *nd = &nfsd;
8498         nfsattrbit_t attrbits;
8499
8500         *attrflagp = 0;
8501         NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp);
8502         nfsm_strtom(nd, name, strlen(name));
8503         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8504         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8505         NFSGETATTR_ATTRBIT(&attrbits);
8506         nfsrv_putattrbit(nd, &attrbits);
8507         error = nfscl_request(nd, vp, p, cred, NULL);
8508         if (error != 0)
8509                 return (error);
8510         if (nd->nd_repstat == 0) {
8511                 /* Just skip over the reply and Getattr op status. */
8512                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8513                     NFSX_UNSIGNED);
8514                 error = nfsm_loadattr(nd, nap);
8515                 if (error == 0)
8516                         *attrflagp = 1;
8517         }
8518         if (error == 0)
8519                 error = nd->nd_repstat;
8520 nfsmout:
8521         m_freem(nd->nd_mrep);
8522         return (error);
8523 }
8524
8525 /*
8526  * The listextattr RPC.
8527  */
8528 int
8529 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8530     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8531     struct ucred *cred, NFSPROC_T *p)
8532 {
8533         uint32_t *tl;
8534         int cnt, error, i, len;
8535         struct nfsrv_descript nfsd;
8536         struct nfsrv_descript *nd = &nfsd;
8537         nfsattrbit_t attrbits;
8538         u_char c;
8539
8540         *attrflagp = 0;
8541         NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp);
8542         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8543         txdr_hyper(*cookiep, tl); tl += 2;
8544         *tl++ = txdr_unsigned(*lenp);
8545         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8546         NFSGETATTR_ATTRBIT(&attrbits);
8547         nfsrv_putattrbit(nd, &attrbits);
8548         error = nfscl_request(nd, vp, p, cred, NULL);
8549         if (error != 0)
8550                 return (error);
8551         *eofp = true;
8552         *lenp = 0;
8553         if (nd->nd_repstat == 0) {
8554                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8555                 *cookiep = fxdr_hyper(tl); tl += 2;
8556                 cnt = fxdr_unsigned(int, *tl);
8557                 if (cnt < 0) {
8558                         error = EBADRPC;
8559                         goto nfsmout;
8560                 }
8561                 for (i = 0; i < cnt; i++) {
8562                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8563                         len = fxdr_unsigned(int, *tl);
8564                         if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8565                                 error = EBADRPC;
8566                                 goto nfsmout;
8567                         }
8568                         if (uiop == NULL)
8569                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8570                         else if (uiop->uio_resid >= len + 1) {
8571                                 c = len;
8572                                 error = uiomove(&c, sizeof(c), uiop);
8573                                 if (error == 0)
8574                                         error = nfsm_mbufuio(nd, uiop, len);
8575                         } else {
8576                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8577                                 *eofp = false;
8578                         }
8579                         if (error != 0)
8580                                 goto nfsmout;
8581                         *lenp += (len + 1);
8582                 }
8583                 /* Get the eof and skip over the Getattr op status. */
8584                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8585                 /*
8586                  * *eofp is set false above, because it wasn't able to copy
8587                  * all of the reply.
8588                  */
8589                 if (*eofp && *tl == 0)
8590                         *eofp = false;
8591                 error = nfsm_loadattr(nd, nap);
8592                 if (error == 0)
8593                         *attrflagp = 1;
8594         }
8595         if (error == 0)
8596                 error = nd->nd_repstat;
8597 nfsmout:
8598         m_freem(nd->nd_mrep);
8599         return (error);
8600 }
8601
8602 /*
8603  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8604  */
8605 static struct mbuf *
8606 nfsm_split(struct mbuf *mp, uint64_t xfer)
8607 {
8608         struct mbuf *m, *m2;
8609         vm_page_t pg;
8610         int i, j, left, pgno, plen, trim;
8611         char *cp, *cp2;
8612
8613         if ((mp->m_flags & M_EXTPG) == 0) {
8614                 m = m_split(mp, xfer, M_WAITOK);
8615                 return (m);
8616         }
8617
8618         /* Find the correct mbuf to split at. */
8619         for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
8620                 xfer -= m->m_len;
8621         if (m == NULL)
8622                 return (NULL);
8623
8624         /* If xfer == m->m_len, we can just split the mbuf list. */
8625         if (xfer == m->m_len) {
8626                 m2 = m->m_next;
8627                 m->m_next = NULL;
8628                 return (m2);
8629         }
8630
8631         /* Find the page to split at. */
8632         pgno = 0;
8633         left = xfer;
8634         do {
8635                 if (pgno == 0)
8636                         plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
8637                 else
8638                         plen = m_epg_pagelen(m, pgno, 0);
8639                 if (left <= plen)
8640                         break;
8641                 left -= plen;
8642                 pgno++;
8643         } while (pgno < m->m_epg_npgs);
8644         if (pgno == m->m_epg_npgs)
8645                 panic("nfsm_split: eroneous ext_pgs mbuf");
8646
8647         m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
8648         m2->m_epg_flags |= EPG_FLAG_ANON;
8649
8650         /*
8651          * If left < plen, allocate a new page for the new mbuf
8652          * and copy the data after left in the page to this new
8653          * page.
8654          */
8655         if (left < plen) {
8656                 do {
8657                         pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
8658                             VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
8659                             VM_ALLOC_WIRED);
8660                         if (pg == NULL)
8661                                 vm_wait(NULL);
8662                 } while (pg == NULL);
8663                 m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
8664                 m2->m_epg_npgs = 1;
8665
8666                 /* Copy the data after left to the new page. */
8667                 trim = plen - left;
8668                 cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
8669                 if (pgno == 0)
8670                         cp += m->m_epg_1st_off;
8671                 cp += left;
8672                 cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
8673                 if (pgno == m->m_epg_npgs - 1)
8674                         m2->m_epg_last_len = trim;
8675                 else {
8676                         cp2 += PAGE_SIZE - trim;
8677                         m2->m_epg_1st_off = PAGE_SIZE - trim;
8678                         m2->m_epg_last_len = m->m_epg_last_len;
8679                 }
8680                 memcpy(cp2, cp, trim);
8681                 m2->m_len = trim;
8682         } else {
8683                 m2->m_len = 0;
8684                 m2->m_epg_last_len = m->m_epg_last_len;
8685         }
8686
8687         /* Move the pages beyond pgno to the new mbuf. */
8688         for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
8689                 m2->m_epg_pa[j] = m->m_epg_pa[i];
8690                 /* Never moves page 0. */
8691                 m2->m_len += m_epg_pagelen(m, i, 0);
8692         }
8693         m2->m_epg_npgs = j;
8694         m->m_epg_npgs = pgno + 1;
8695         m->m_epg_last_len = left;
8696         m->m_len = xfer;
8697
8698         m2->m_next = m->m_next;
8699         m->m_next = NULL;
8700         return (m2);
8701 }