]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
THIS BRANCH IS OBSOLETE, PLEASE READ:
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 /*
40  * Rpc op calls, generally called from the vnode op calls or through the
41  * buffer cache, for NFS v2, 3 and 4.
42  * These do not normally make any changes to vnode arguments or use
43  * structures that might change between the VFS variants. The returned
44  * arguments are all at the end, after the NFSPROC_T *p one.
45  */
46
47 #include "opt_inet6.h"
48
49 #include <fs/nfs/nfsport.h>
50 #include <fs/nfsclient/nfs.h>
51 #include <sys/extattr.h>
52 #include <sys/sysctl.h>
53 #include <sys/taskqueue.h>
54
55 SYSCTL_DECL(_vfs_nfs);
56
57 static int      nfsignore_eexist = 0;
58 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
59     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
60
61 static int      nfscl_dssameconn = 0;
62 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
63     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
64
65 /*
66  * Global variables
67  */
68 extern int nfs_numnfscbd;
69 extern struct timeval nfsboottime;
70 extern u_int32_t newnfs_false, newnfs_true;
71 extern nfstype nfsv34_type[9];
72 extern int nfsrv_useacl;
73 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
74 extern int nfscl_debuglevel;
75 extern int nfs_pnfsiothreads;
76 extern u_long sb_max_adj;
77 extern int nfs_maxcopyrange;
78 NFSCLSTATEMUTEX;
79 int nfstest_outofseq = 0;
80 int nfscl_assumeposixlocks = 1;
81 int nfscl_enablecallb = 0;
82 short nfsv4_cbport = NFSV4_CBPORT;
83 int nfstest_openallsetattr = 0;
84
85 #define DIRHDSIZ        offsetof(struct dirent, d_name)
86
87 /*
88  * nfscl_getsameserver() can return one of three values:
89  * NFSDSP_USETHISSESSION - Use this session for the DS.
90  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
91  *     session.
92  * NFSDSP_NOTFOUND - No matching server was found.
93  */
94 enum nfsclds_state {
95         NFSDSP_USETHISSESSION = 0,
96         NFSDSP_SEQTHISSESSION = 1,
97         NFSDSP_NOTFOUND = 2,
98 };
99
100 /*
101  * Do a write RPC on a DS data file, using this structure for the arguments,
102  * so that this function can be executed by a separate kernel process.
103  */
104 struct nfsclwritedsdorpc {
105         int                     done;
106         int                     inprog;
107         struct task             tsk;
108         struct vnode            *vp;
109         int                     iomode;
110         int                     must_commit;
111         nfsv4stateid_t          *stateidp;
112         struct nfsclds          *dsp;
113         uint64_t                off;
114         int                     len;
115 #ifdef notyet
116         int                     advise;
117 #endif
118         struct nfsfh            *fhp;
119         struct mbuf             *m;
120         int                     vers;
121         int                     minorvers;
122         struct ucred            *cred;
123         NFSPROC_T               *p;
124         int                     err;
125 };
126
127 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
128     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
129 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
130     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
131 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
132     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
133     void *);
134 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
135     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
136     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
137 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
138     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
139     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
140     int *, void *, int *);
141 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
142     struct nfscllockowner *, u_int64_t, u_int64_t,
143     u_int32_t, struct ucred *, NFSPROC_T *, int);
144 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
145     struct acl *, nfsv4stateid_t *, void *);
146 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
147     uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
148     struct ucred *, NFSPROC_T *);
149 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
150     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
151     NFSPROC_T *);
152 static void nfscl_initsessionslots(struct nfsclsession *);
153 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
154     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
155     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
156     NFSPROC_T *);
157 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
158     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
159     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
160     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
161 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
162     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
163     struct ucred *, NFSPROC_T *);
164 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
165     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
166     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
167 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
168     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
169     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
170 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
171     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
172     struct ucred *, NFSPROC_T *);
173 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
174     struct nfsclds *, struct nfsclds **, uint32_t *);
175 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
176     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
177     NFSPROC_T *);
178 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
179     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
180 #ifdef notyet
181 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
182     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
183     NFSPROC_T *);
184 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
185     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
186 #endif
187 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
188     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
189 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
190     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
191 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
192     NFSPROC_T *);
193 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
194     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
195 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
196     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
197     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
198 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
199     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
200     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
201     struct nfsfh **, int *, int *, void *, int *);
202 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
203     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
205     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
206 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
207     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
208     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
209     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
210     int, int, int, int *, struct nfsclflayouthead *, int *);
211 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
212     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
213     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
214 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
215     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
216     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
217 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
218     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
219     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
220 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
221     int, struct nfsvattr *, int *, struct ucred *);
222 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
223
224 int nfs_pnfsio(task_fn_t *, void *);
225
226 /*
227  * nfs null call from vfs.
228  */
229 int
230 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
231 {
232         int error;
233         struct nfsrv_descript nfsd, *nd = &nfsd;
234
235         NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
236         error = nfscl_request(nd, vp, p, cred, NULL);
237         if (nd->nd_repstat && !error)
238                 error = nd->nd_repstat;
239         m_freem(nd->nd_mrep);
240         return (error);
241 }
242
243 /*
244  * nfs access rpc op.
245  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
246  * modes are changed on the server, accesses might still fail later.
247  */
248 int
249 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
250     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
251 {
252         int error;
253         u_int32_t mode, rmode;
254
255         if (acmode & VREAD)
256                 mode = NFSACCESS_READ;
257         else
258                 mode = 0;
259         if (vnode_vtype(vp) == VDIR) {
260                 if (acmode & VWRITE)
261                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
262                                  NFSACCESS_DELETE);
263                 if (acmode & VEXEC)
264                         mode |= NFSACCESS_LOOKUP;
265         } else {
266                 if (acmode & VWRITE)
267                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
268                 if (acmode & VEXEC)
269                         mode |= NFSACCESS_EXECUTE;
270         }
271
272         /*
273          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
274          */
275         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
276             NULL);
277
278         /*
279          * The NFS V3 spec does not clarify whether or not
280          * the returned access bits can be a superset of
281          * the ones requested, so...
282          */
283         if (!error && (rmode & mode) != mode)
284                 error = EACCES;
285         return (error);
286 }
287
288 /*
289  * The actual rpc, separated out for Darwin.
290  */
291 int
292 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
293     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
294     void *stuff)
295 {
296         u_int32_t *tl;
297         u_int32_t supported, rmode;
298         int error;
299         struct nfsrv_descript nfsd, *nd = &nfsd;
300         nfsattrbit_t attrbits;
301
302         *attrflagp = 0;
303         supported = mode;
304         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
305         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
306         *tl = txdr_unsigned(mode);
307         if (nd->nd_flag & ND_NFSV4) {
308                 /*
309                  * And do a Getattr op.
310                  */
311                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
312                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
313                 NFSGETATTR_ATTRBIT(&attrbits);
314                 (void) nfsrv_putattrbit(nd, &attrbits);
315         }
316         error = nfscl_request(nd, vp, p, cred, stuff);
317         if (error)
318                 return (error);
319         if (nd->nd_flag & ND_NFSV3) {
320                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
321                 if (error)
322                         goto nfsmout;
323         }
324         if (!nd->nd_repstat) {
325                 if (nd->nd_flag & ND_NFSV4) {
326                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
327                         supported = fxdr_unsigned(u_int32_t, *tl++);
328                 } else {
329                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
330                 }
331                 rmode = fxdr_unsigned(u_int32_t, *tl);
332                 if (nd->nd_flag & ND_NFSV4)
333                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
334
335                 /*
336                  * It's not obvious what should be done about
337                  * unsupported access modes. For now, be paranoid
338                  * and clear the unsupported ones.
339                  */
340                 rmode &= supported;
341                 *rmodep = rmode;
342         } else
343                 error = nd->nd_repstat;
344 nfsmout:
345         m_freem(nd->nd_mrep);
346         return (error);
347 }
348
349 /*
350  * nfs open rpc
351  */
352 int
353 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
354 {
355         struct nfsclopen *op;
356         struct nfscldeleg *dp;
357         struct nfsfh *nfhp;
358         struct nfsnode *np = VTONFS(vp);
359         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
360         u_int32_t mode, clidrev;
361         int ret, newone, error, expireret = 0, retrycnt;
362
363         /*
364          * For NFSv4, Open Ops are only done on Regular Files.
365          */
366         if (vnode_vtype(vp) != VREG)
367                 return (0);
368         mode = 0;
369         if (amode & FREAD)
370                 mode |= NFSV4OPEN_ACCESSREAD;
371         if (amode & FWRITE)
372                 mode |= NFSV4OPEN_ACCESSWRITE;
373         nfhp = np->n_fhp;
374
375         retrycnt = 0;
376 #ifdef notdef
377 { char name[100]; int namel;
378 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
379 bcopy(NFS4NODENAME(np->n_v4), name, namel);
380 name[namel] = '\0';
381 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
382 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
383 else printf(" fhl=0\n");
384 }
385 #endif
386         do {
387             dp = NULL;
388             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
389                 cred, p, NULL, &op, &newone, &ret, 1);
390             if (error) {
391                 return (error);
392             }
393             if (nmp->nm_clp != NULL)
394                 clidrev = nmp->nm_clp->nfsc_clientidrev;
395             else
396                 clidrev = 0;
397             if (ret == NFSCLOPEN_DOOPEN) {
398                 if (np->n_v4 != NULL) {
399                         /*
400                          * For the first attempt, try and get a layout, if
401                          * pNFS is enabled for the mount.
402                          */
403                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
404                             nfs_numnfscbd == 0 ||
405                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
406                                 error = nfsrpc_openrpc(nmp, vp,
407                                     np->n_v4->n4_data,
408                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
409                                     np->n_fhp->nfh_len, mode, op,
410                                     NFS4NODENAME(np->n_v4),
411                                     np->n_v4->n4_namelen,
412                                     &dp, 0, 0x0, cred, p, 0, 0);
413                         else
414                                 error = nfsrpc_getopenlayout(nmp, vp,
415                                     np->n_v4->n4_data,
416                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
417                                     np->n_fhp->nfh_len, mode, op,
418                                     NFS4NODENAME(np->n_v4),
419                                     np->n_v4->n4_namelen, &dp, cred, p);
420                         if (dp != NULL) {
421 #ifdef APPLE
422                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
423 #else
424                                 NFSLOCKNODE(np);
425                                 np->n_flag &= ~NDELEGMOD;
426                                 /*
427                                  * Invalidate the attribute cache, so that
428                                  * attributes that pre-date the issue of a
429                                  * delegation are not cached, since the
430                                  * cached attributes will remain valid while
431                                  * the delegation is held.
432                                  */
433                                 NFSINVALATTRCACHE(np);
434                                 NFSUNLOCKNODE(np);
435 #endif
436                                 (void) nfscl_deleg(nmp->nm_mountp,
437                                     op->nfso_own->nfsow_clp,
438                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
439                         }
440                 } else {
441                         error = EIO;
442                 }
443                 newnfs_copyincred(cred, &op->nfso_cred);
444             } else if (ret == NFSCLOPEN_SETCRED)
445                 /*
446                  * This is a new local open on a delegation. It needs
447                  * to have credentials so that an open can be done
448                  * against the server during recovery.
449                  */
450                 newnfs_copyincred(cred, &op->nfso_cred);
451
452             /*
453              * nfso_opencnt is the count of how many VOP_OPEN()s have
454              * been done on this Open successfully and a VOP_CLOSE()
455              * is expected for each of these.
456              * If error is non-zero, don't increment it, since the Open
457              * hasn't succeeded yet.
458              */
459             if (!error)
460                 op->nfso_opencnt++;
461             nfscl_openrelease(nmp, op, error, newone);
462             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
463                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
464                 error == NFSERR_BADSESSION) {
465                 (void) nfs_catnap(PZERO, error, "nfs_open");
466             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
467                 && clidrev != 0) {
468                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
469                 retrycnt++;
470             }
471         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
472             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
473             error == NFSERR_BADSESSION ||
474             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
475              expireret == 0 && clidrev != 0 && retrycnt < 4));
476         if (error && retrycnt >= 4)
477                 error = EIO;
478         return (error);
479 }
480
481 /*
482  * the actual open rpc
483  */
484 int
485 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
486     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
487     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
488     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
489     int syscred, int recursed)
490 {
491         u_int32_t *tl;
492         struct nfsrv_descript nfsd, *nd = &nfsd;
493         struct nfscldeleg *dp, *ndp = NULL;
494         struct nfsvattr nfsva;
495         u_int32_t rflags, deleg;
496         nfsattrbit_t attrbits;
497         int error, ret, acesize, limitby;
498         struct nfsclsession *tsep;
499
500         dp = *dpp;
501         *dpp = NULL;
502         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
503         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
504         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
505         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
506         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
507         tsep = nfsmnt_mdssession(nmp);
508         *tl++ = tsep->nfsess_clientid.lval[0];
509         *tl = tsep->nfsess_clientid.lval[1];
510         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
511         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
512         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
513         if (reclaim) {
514                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
515                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
516                 *tl = txdr_unsigned(delegtype);
517         } else {
518                 if (dp != NULL) {
519                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
520                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
521                         if (NFSHASNFSV4N(nmp))
522                                 *tl++ = 0;
523                         else
524                                 *tl++ = dp->nfsdl_stateid.seqid;
525                         *tl++ = dp->nfsdl_stateid.other[0];
526                         *tl++ = dp->nfsdl_stateid.other[1];
527                         *tl = dp->nfsdl_stateid.other[2];
528                 } else {
529                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
530                 }
531                 (void) nfsm_strtom(nd, name, namelen);
532         }
533         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
534         *tl = txdr_unsigned(NFSV4OP_GETATTR);
535         NFSZERO_ATTRBIT(&attrbits);
536         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
537         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
538         (void) nfsrv_putattrbit(nd, &attrbits);
539         if (syscred)
540                 nd->nd_flag |= ND_USEGSSNAME;
541         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
542             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
543         if (error)
544                 return (error);
545         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
546         if (!nd->nd_repstat) {
547                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
548                     6 * NFSX_UNSIGNED);
549                 op->nfso_stateid.seqid = *tl++;
550                 op->nfso_stateid.other[0] = *tl++;
551                 op->nfso_stateid.other[1] = *tl++;
552                 op->nfso_stateid.other[2] = *tl;
553                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
554                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
555                 if (error)
556                         goto nfsmout;
557                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
558                 deleg = fxdr_unsigned(u_int32_t, *tl);
559                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
560                     deleg == NFSV4OPEN_DELEGATEWRITE) {
561                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
562                               NFSCLFLAGS_FIRSTDELEG))
563                                 op->nfso_own->nfsow_clp->nfsc_flags |=
564                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
565                         ndp = malloc(
566                             sizeof (struct nfscldeleg) + newfhlen,
567                             M_NFSCLDELEG, M_WAITOK);
568                         LIST_INIT(&ndp->nfsdl_owner);
569                         LIST_INIT(&ndp->nfsdl_lock);
570                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
571                         ndp->nfsdl_fhlen = newfhlen;
572                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
573                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
574                         nfscl_lockinit(&ndp->nfsdl_rwlock);
575                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
576                             NFSX_UNSIGNED);
577                         ndp->nfsdl_stateid.seqid = *tl++;
578                         ndp->nfsdl_stateid.other[0] = *tl++;
579                         ndp->nfsdl_stateid.other[1] = *tl++;
580                         ndp->nfsdl_stateid.other[2] = *tl++;
581                         ret = fxdr_unsigned(int, *tl);
582                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
583                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
584                                 /*
585                                  * Indicates how much the file can grow.
586                                  */
587                                 NFSM_DISSECT(tl, u_int32_t *,
588                                     3 * NFSX_UNSIGNED);
589                                 limitby = fxdr_unsigned(int, *tl++);
590                                 switch (limitby) {
591                                 case NFSV4OPEN_LIMITSIZE:
592                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
593                                         break;
594                                 case NFSV4OPEN_LIMITBLOCKS:
595                                         ndp->nfsdl_sizelimit =
596                                             fxdr_unsigned(u_int64_t, *tl++);
597                                         ndp->nfsdl_sizelimit *=
598                                             fxdr_unsigned(u_int64_t, *tl);
599                                         break;
600                                 default:
601                                         error = NFSERR_BADXDR;
602                                         goto nfsmout;
603                                 }
604                         } else {
605                                 ndp->nfsdl_flags = NFSCLDL_READ;
606                         }
607                         if (ret)
608                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
609                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
610                             &acesize, p);
611                         if (error)
612                                 goto nfsmout;
613                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
614                         error = NFSERR_BADXDR;
615                         goto nfsmout;
616                 }
617                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
618                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
619                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
620                     NULL, NULL, NULL, p, cred);
621                 if (error)
622                         goto nfsmout;
623                 if (ndp != NULL) {
624                         ndp->nfsdl_change = nfsva.na_filerev;
625                         ndp->nfsdl_modtime = nfsva.na_mtime;
626                         ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
627                 }
628                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
629                     do {
630                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
631                             cred, p);
632                         if (ret == NFSERR_DELAY)
633                             (void) nfs_catnap(PZERO, ret, "nfs_open");
634                     } while (ret == NFSERR_DELAY);
635                     error = ret;
636                 }
637                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
638                     nfscl_assumeposixlocks)
639                     op->nfso_posixlock = 1;
640                 else
641                     op->nfso_posixlock = 0;
642
643                 /*
644                  * If the server is handing out delegations, but we didn't
645                  * get one because an OpenConfirm was required, try the
646                  * Open again, to get a delegation. This is a harmless no-op,
647                  * from a server's point of view.
648                  */
649                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
650                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
651                     && !error && dp == NULL && ndp == NULL && !recursed) {
652                     do {
653                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
654                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
655                             cred, p, syscred, 1);
656                         if (ret == NFSERR_DELAY)
657                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
658                     } while (ret == NFSERR_DELAY);
659                     if (ret) {
660                         if (ndp != NULL) {
661                                 free(ndp, M_NFSCLDELEG);
662                                 ndp = NULL;
663                         }
664                         if (ret == NFSERR_STALECLIENTID ||
665                             ret == NFSERR_STALEDONTRECOVER ||
666                             ret == NFSERR_BADSESSION)
667                                 error = ret;
668                     }
669                 }
670         }
671         if (nd->nd_repstat != 0 && error == 0)
672                 error = nd->nd_repstat;
673         if (error == NFSERR_STALECLIENTID)
674                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
675 nfsmout:
676         if (!error)
677                 *dpp = ndp;
678         else if (ndp != NULL)
679                 free(ndp, M_NFSCLDELEG);
680         m_freem(nd->nd_mrep);
681         return (error);
682 }
683
684 /*
685  * open downgrade rpc
686  */
687 int
688 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
689     struct ucred *cred, NFSPROC_T *p)
690 {
691         u_int32_t *tl;
692         struct nfsrv_descript nfsd, *nd = &nfsd;
693         int error;
694
695         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
696         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
697         if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
698                 *tl++ = 0;
699         else
700                 *tl++ = op->nfso_stateid.seqid;
701         *tl++ = op->nfso_stateid.other[0];
702         *tl++ = op->nfso_stateid.other[1];
703         *tl++ = op->nfso_stateid.other[2];
704         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
705         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
706         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
707         error = nfscl_request(nd, vp, p, cred, NULL);
708         if (error)
709                 return (error);
710         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
711         if (!nd->nd_repstat) {
712                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
713                 op->nfso_stateid.seqid = *tl++;
714                 op->nfso_stateid.other[0] = *tl++;
715                 op->nfso_stateid.other[1] = *tl++;
716                 op->nfso_stateid.other[2] = *tl;
717         }
718         if (nd->nd_repstat && error == 0)
719                 error = nd->nd_repstat;
720         if (error == NFSERR_STALESTATEID)
721                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
722 nfsmout:
723         m_freem(nd->nd_mrep);
724         return (error);
725 }
726
727 /*
728  * V4 Close operation.
729  */
730 int
731 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
732 {
733         struct nfsclclient *clp;
734         int error;
735
736         if (vnode_vtype(vp) != VREG)
737                 return (0);
738         if (doclose)
739                 error = nfscl_doclose(vp, &clp, p);
740         else
741                 error = nfscl_getclose(vp, &clp);
742         if (error)
743                 return (error);
744
745         nfscl_clientrelease(clp);
746         return (0);
747 }
748
749 /*
750  * Close the open.
751  */
752 void
753 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
754 {
755         struct nfsrv_descript nfsd, *nd = &nfsd;
756         struct nfscllockowner *lp, *nlp;
757         struct nfscllock *lop, *nlop;
758         struct ucred *tcred;
759         u_int64_t off = 0, len = 0;
760         u_int32_t type = NFSV4LOCKT_READ;
761         int error, do_unlock, trycnt;
762
763         tcred = newnfs_getcred();
764         newnfs_copycred(&op->nfso_cred, tcred);
765         /*
766          * (Theoretically this could be done in the same
767          *  compound as the close, but having multiple
768          *  sequenced Ops in the same compound might be
769          *  too scary for some servers.)
770          */
771         if (op->nfso_posixlock) {
772                 off = 0;
773                 len = NFS64BITSSET;
774                 type = NFSV4LOCKT_READ;
775         }
776
777         /*
778          * Since this function is only called from VOP_INACTIVE(), no
779          * other thread will be manipulating this Open. As such, the
780          * lock lists are not being changed by other threads, so it should
781          * be safe to do this without locking.
782          */
783         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
784                 do_unlock = 1;
785                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
786                         if (op->nfso_posixlock == 0) {
787                                 off = lop->nfslo_first;
788                                 len = lop->nfslo_end - lop->nfslo_first;
789                                 if (lop->nfslo_type == F_WRLCK)
790                                         type = NFSV4LOCKT_WRITE;
791                                 else
792                                         type = NFSV4LOCKT_READ;
793                         }
794                         if (do_unlock) {
795                                 trycnt = 0;
796                                 do {
797                                         error = nfsrpc_locku(nd, nmp, lp, off,
798                                             len, type, tcred, p, 0);
799                                         if ((nd->nd_repstat == NFSERR_GRACE ||
800                                             nd->nd_repstat == NFSERR_DELAY) &&
801                                             error == 0)
802                                                 (void) nfs_catnap(PZERO,
803                                                     (int)nd->nd_repstat,
804                                                     "nfs_close");
805                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
806                                     nd->nd_repstat == NFSERR_DELAY) &&
807                                     error == 0 && trycnt++ < 5);
808                                 if (op->nfso_posixlock)
809                                         do_unlock = 0;
810                         }
811                         nfscl_freelock(lop, 0);
812                 }
813                 /*
814                  * Do a ReleaseLockOwner.
815                  * The lock owner name nfsl_owner may be used by other opens for
816                  * other files but the lock_owner4 name that nfsrpc_rellockown()
817                  * puts on the wire has the file handle for this file appended
818                  * to it, so it can be done now.
819                  */
820                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
821                     lp->nfsl_open->nfso_fhlen, tcred, p);
822         }
823
824         /*
825          * There could be other Opens for different files on the same
826          * OpenOwner, so locking is required.
827          */
828         NFSLOCKCLSTATE();
829         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
830         NFSUNLOCKCLSTATE();
831         do {
832                 error = nfscl_tryclose(op, tcred, nmp, p);
833                 if (error == NFSERR_GRACE)
834                         (void) nfs_catnap(PZERO, error, "nfs_close");
835         } while (error == NFSERR_GRACE);
836         NFSLOCKCLSTATE();
837         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
838
839         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
840                 nfscl_freelockowner(lp, 0);
841         nfscl_freeopen(op, 0);
842         NFSUNLOCKCLSTATE();
843         NFSFREECRED(tcred);
844 }
845
846 /*
847  * The actual Close RPC.
848  */
849 int
850 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
851     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
852     int syscred)
853 {
854         u_int32_t *tl;
855         int error;
856
857         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
858             op->nfso_fhlen, NULL, NULL, 0, 0);
859         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
860         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
861         if (NFSHASNFSV4N(nmp))
862                 *tl++ = 0;
863         else
864                 *tl++ = op->nfso_stateid.seqid;
865         *tl++ = op->nfso_stateid.other[0];
866         *tl++ = op->nfso_stateid.other[1];
867         *tl = op->nfso_stateid.other[2];
868         if (syscred)
869                 nd->nd_flag |= ND_USEGSSNAME;
870         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
871             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
872         if (error)
873                 return (error);
874         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
875         if (nd->nd_repstat == 0)
876                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
877         error = nd->nd_repstat;
878         if (error == NFSERR_STALESTATEID)
879                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
880 nfsmout:
881         m_freem(nd->nd_mrep);
882         return (error);
883 }
884
885 /*
886  * V4 Open Confirm RPC.
887  */
888 int
889 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
890     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
891 {
892         u_int32_t *tl;
893         struct nfsrv_descript nfsd, *nd = &nfsd;
894         struct nfsmount *nmp;
895         int error;
896
897         nmp = VFSTONFS(vp->v_mount);
898         if (NFSHASNFSV4N(nmp))
899                 return (0);             /* No confirmation for NFSv4.1. */
900         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
901             0, 0);
902         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
903         *tl++ = op->nfso_stateid.seqid;
904         *tl++ = op->nfso_stateid.other[0];
905         *tl++ = op->nfso_stateid.other[1];
906         *tl++ = op->nfso_stateid.other[2];
907         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
908         error = nfscl_request(nd, vp, p, cred, NULL);
909         if (error)
910                 return (error);
911         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
912         if (!nd->nd_repstat) {
913                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
914                 op->nfso_stateid.seqid = *tl++;
915                 op->nfso_stateid.other[0] = *tl++;
916                 op->nfso_stateid.other[1] = *tl++;
917                 op->nfso_stateid.other[2] = *tl;
918         }
919         error = nd->nd_repstat;
920         if (error == NFSERR_STALESTATEID)
921                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
922 nfsmout:
923         m_freem(nd->nd_mrep);
924         return (error);
925 }
926
927 /*
928  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
929  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
930  */
931 int
932 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
933     bool *retokp, struct ucred *cred, NFSPROC_T *p)
934 {
935         u_int32_t *tl;
936         struct nfsrv_descript nfsd;
937         struct nfsrv_descript *nd = &nfsd;
938         nfsattrbit_t attrbits;
939         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
940         u_short port;
941         int error, isinet6 = 0, callblen;
942         nfsquad_t confirm;
943         u_int32_t lease;
944         static u_int32_t rev = 0;
945         struct nfsclds *dsp, *odsp;
946         struct in6_addr a6;
947         struct nfsclsession *tsep;
948
949         if (nfsboottime.tv_sec == 0)
950                 NFSSETBOOTTIME(nfsboottime);
951         if (NFSHASNFSV4N(nmp)) {
952                 error = NFSERR_BADSESSION;
953                 odsp = dsp = NULL;
954                 if (retokp != NULL) {
955                         NFSLOCKMNT(nmp);
956                         odsp = TAILQ_FIRST(&nmp->nm_sess);
957                         NFSUNLOCKMNT(nmp);
958                 }
959                 if (odsp != NULL) {
960                         /*
961                          * When a session already exists, first try a
962                          * CreateSession with the extant ClientID.
963                          */
964                         dsp = malloc(sizeof(struct nfsclds) +
965                             odsp->nfsclds_servownlen + 1, M_NFSCLDS,
966                             M_WAITOK | M_ZERO);
967                         dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
968                         dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
969                         dsp->nfsclds_sess.nfsess_clientid =
970                             odsp->nfsclds_sess.nfsess_clientid;
971                         dsp->nfsclds_sess.nfsess_sequenceid =
972                             odsp->nfsclds_sess.nfsess_sequenceid;
973                         dsp->nfsclds_flags = odsp->nfsclds_flags;
974                         if (dsp->nfsclds_servownlen > 0)
975                                 memcpy(dsp->nfsclds_serverown,
976                                     odsp->nfsclds_serverown,
977                                     dsp->nfsclds_servownlen + 1);
978                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
979                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
980                             NULL, MTX_DEF);
981                         nfscl_initsessionslots(&dsp->nfsclds_sess);
982                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
983                             &nmp->nm_sockreq, NULL,
984                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
985                         NFSCL_DEBUG(1, "create session for extant "
986                             "ClientID=%d\n", error);
987                         if (error != 0) {
988                                 nfscl_freenfsclds(dsp);
989                                 dsp = NULL;
990                                 /*
991                                  * If *retokp is true, return any error other
992                                  * than NFSERR_STALECLIENTID,
993                                  * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
994                                  * so that nfscl_recover() will not loop.
995                                  */
996                                 if (*retokp)
997                                         return (NFSERR_IO);
998                         } else
999                                 *retokp = true;
1000                 } else if (retokp != NULL && *retokp)
1001                         return (NFSERR_IO);
1002                 if (error != 0) {
1003                         /*
1004                          * Either there was no previous session or the
1005                          * CreateSession attempt failed, so...
1006                          * do an ExchangeID followed by the CreateSession.
1007                          */
1008                         clp->nfsc_rev = rev++;
1009                         error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1010                             NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1011                             cred, p);
1012                         NFSCL_DEBUG(1, "aft exch=%d\n", error);
1013                         if (error == 0)
1014                                 error = nfsrpc_createsession(nmp,
1015                                     &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1016                                     dsp->nfsclds_sess.nfsess_sequenceid, 1,
1017                                     cred, p);
1018                         NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1019                 }
1020                 if (error == 0) {
1021                         NFSLOCKMNT(nmp);
1022                         /*
1023                          * The old sessions cannot be safely free'd
1024                          * here, since they may still be used by
1025                          * in-progress RPCs.
1026                          */
1027                         tsep = NULL;
1028                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
1029                                 tsep = NFSMNT_MDSSESSION(nmp);
1030                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1031                             nfsclds_list);
1032                         /*
1033                          * Wake up RPCs waiting for a slot on the
1034                          * old session. These will then fail with
1035                          * NFSERR_BADSESSION and be retried with the
1036                          * new session by nfsv4_setsequence().
1037                          * Also wakeup() processes waiting for the
1038                          * new session.
1039                          */
1040                         if (tsep != NULL)
1041                                 wakeup(&tsep->nfsess_slots);
1042                         wakeup(&nmp->nm_sess);
1043                         NFSUNLOCKMNT(nmp);
1044                 } else if (dsp != NULL)
1045                         nfscl_freenfsclds(dsp);
1046                 if (error == 0 && reclaim == 0) {
1047                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
1048                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1049                         if (error == NFSERR_COMPLETEALREADY ||
1050                             error == NFSERR_NOTSUPP)
1051                                 /* Ignore this error. */
1052                                 error = 0;
1053                 }
1054                 return (error);
1055         } else if (retokp != NULL && *retokp)
1056                 return (NFSERR_IO);
1057         clp->nfsc_rev = rev++;
1058
1059         /*
1060          * Allocate a single session structure for NFSv4.0, because some of
1061          * the fields are used by NFSv4.0 although it doesn't do a session.
1062          */
1063         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1064         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1065         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1066         NFSLOCKMNT(nmp);
1067         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1068         tsep = NFSMNT_MDSSESSION(nmp);
1069         NFSUNLOCKMNT(nmp);
1070
1071         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0);
1072         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1073         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
1074         *tl = txdr_unsigned(clp->nfsc_rev);
1075         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1076
1077         /*
1078          * set up the callback address
1079          */
1080         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1081         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1082         callblen = strlen(nfsv4_callbackaddr);
1083         if (callblen == 0)
1084                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1085         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1086             (callblen > 0 || cp != NULL)) {
1087                 port = htons(nfsv4_cbport);
1088                 cp2 = (u_int8_t *)&port;
1089 #ifdef INET6
1090                 if ((callblen > 0 &&
1091                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1092                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1093
1094                         (void) nfsm_strtom(nd, "tcp6", 4);
1095                         if (callblen == 0) {
1096                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1097                                 ip6add = ip6buf;
1098                         } else {
1099                                 ip6add = nfsv4_callbackaddr;
1100                         }
1101                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1102                             ip6add, cp2[0], cp2[1]);
1103                 } else
1104 #endif
1105                 {
1106                         (void) nfsm_strtom(nd, "tcp", 3);
1107                         if (callblen == 0)
1108                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1109                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1110                                     cp[2], cp[3], cp2[0], cp2[1]);
1111                         else
1112                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1113                                     "%s.%d.%d", nfsv4_callbackaddr,
1114                                     cp2[0], cp2[1]);
1115                 }
1116                 (void) nfsm_strtom(nd, addr, strlen(addr));
1117         } else {
1118                 (void) nfsm_strtom(nd, "tcp", 3);
1119                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1120         }
1121         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1122         *tl = txdr_unsigned(clp->nfsc_cbident);
1123         nd->nd_flag |= ND_USEGSSNAME;
1124         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1125                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1126         if (error)
1127                 return (error);
1128         if (nd->nd_repstat == 0) {
1129             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1130             tsep->nfsess_clientid.lval[0] = *tl++;
1131             tsep->nfsess_clientid.lval[1] = *tl++;
1132             confirm.lval[0] = *tl++;
1133             confirm.lval[1] = *tl;
1134             m_freem(nd->nd_mrep);
1135             nd->nd_mrep = NULL;
1136
1137             /*
1138              * and confirm it.
1139              */
1140             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1141                 NULL, 0, 0);
1142             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1143             *tl++ = tsep->nfsess_clientid.lval[0];
1144             *tl++ = tsep->nfsess_clientid.lval[1];
1145             *tl++ = confirm.lval[0];
1146             *tl = confirm.lval[1];
1147             nd->nd_flag |= ND_USEGSSNAME;
1148             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1149                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1150             if (error)
1151                 return (error);
1152             m_freem(nd->nd_mrep);
1153             nd->nd_mrep = NULL;
1154             if (nd->nd_repstat == 0) {
1155                 nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1156                     nmp->nm_fhsize, NULL, NULL, 0, 0);
1157                 NFSZERO_ATTRBIT(&attrbits);
1158                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1159                 (void) nfsrv_putattrbit(nd, &attrbits);
1160                 nd->nd_flag |= ND_USEGSSNAME;
1161                 error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1162                     cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1163                 if (error)
1164                     return (error);
1165                 if (nd->nd_repstat == 0) {
1166                     error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1167                         NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1168                     if (error)
1169                         goto nfsmout;
1170                     clp->nfsc_renew = NFSCL_RENEW(lease);
1171                     clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1172                     clp->nfsc_clientidrev++;
1173                     if (clp->nfsc_clientidrev == 0)
1174                         clp->nfsc_clientidrev++;
1175                 }
1176             }
1177         }
1178         error = nd->nd_repstat;
1179 nfsmout:
1180         m_freem(nd->nd_mrep);
1181         return (error);
1182 }
1183
1184 /*
1185  * nfs getattr call.
1186  */
1187 int
1188 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1189     struct nfsvattr *nap, void *stuff)
1190 {
1191         struct nfsrv_descript nfsd, *nd = &nfsd;
1192         int error;
1193         nfsattrbit_t attrbits;
1194
1195         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1196         if (nd->nd_flag & ND_NFSV4) {
1197                 NFSGETATTR_ATTRBIT(&attrbits);
1198                 (void) nfsrv_putattrbit(nd, &attrbits);
1199         }
1200         error = nfscl_request(nd, vp, p, cred, stuff);
1201         if (error)
1202                 return (error);
1203         if (!nd->nd_repstat)
1204                 error = nfsm_loadattr(nd, nap);
1205         else
1206                 error = nd->nd_repstat;
1207         m_freem(nd->nd_mrep);
1208         return (error);
1209 }
1210
1211 /*
1212  * nfs getattr call with non-vnode arguemnts.
1213  */
1214 int
1215 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1216     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1217     uint32_t *leasep)
1218 {
1219         struct nfsrv_descript nfsd, *nd = &nfsd;
1220         int error, vers = NFS_VER2;
1221         nfsattrbit_t attrbits;
1222
1223         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0);
1224         if (nd->nd_flag & ND_NFSV4) {
1225                 vers = NFS_VER4;
1226                 NFSGETATTR_ATTRBIT(&attrbits);
1227                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1228                 (void) nfsrv_putattrbit(nd, &attrbits);
1229         } else if (nd->nd_flag & ND_NFSV3) {
1230                 vers = NFS_VER3;
1231         }
1232         if (syscred)
1233                 nd->nd_flag |= ND_USEGSSNAME;
1234         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1235             NFS_PROG, vers, NULL, 1, xidp, NULL);
1236         if (error)
1237                 return (error);
1238         if (nd->nd_repstat == 0) {
1239                 if ((nd->nd_flag & ND_NFSV4) != 0)
1240                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1241                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1242                             NULL, NULL);
1243                 else
1244                         error = nfsm_loadattr(nd, nap);
1245         } else
1246                 error = nd->nd_repstat;
1247         m_freem(nd->nd_mrep);
1248         return (error);
1249 }
1250
1251 /*
1252  * Do an nfs setattr operation.
1253  */
1254 int
1255 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1256     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1257     void *stuff)
1258 {
1259         int error, expireret = 0, openerr, retrycnt;
1260         u_int32_t clidrev = 0, mode;
1261         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1262         struct nfsfh *nfhp;
1263         nfsv4stateid_t stateid;
1264         void *lckp;
1265
1266         if (nmp->nm_clp != NULL)
1267                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1268         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1269                 mode = NFSV4OPEN_ACCESSWRITE;
1270         else
1271                 mode = NFSV4OPEN_ACCESSREAD;
1272         retrycnt = 0;
1273         do {
1274                 lckp = NULL;
1275                 openerr = 1;
1276                 if (NFSHASNFSV4(nmp)) {
1277                         nfhp = VTONFS(vp)->n_fhp;
1278                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1279                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1280                         if (error && vnode_vtype(vp) == VREG &&
1281                             (mode == NFSV4OPEN_ACCESSWRITE ||
1282                              nfstest_openallsetattr)) {
1283                                 /*
1284                                  * No Open stateid, so try and open the file
1285                                  * now.
1286                                  */
1287                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1288                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1289                                             p);
1290                                 else
1291                                         openerr = nfsrpc_open(vp, FREAD, cred,
1292                                             p);
1293                                 if (!openerr)
1294                                         (void) nfscl_getstateid(vp,
1295                                             nfhp->nfh_fh, nfhp->nfh_len,
1296                                             mode, 0, cred, p, &stateid, &lckp);
1297                         }
1298                 }
1299                 if (vap != NULL)
1300                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1301                             rnap, attrflagp, stuff);
1302                 else
1303                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1304                             stuff);
1305                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1306                         NFSLOCKMNT(nmp);
1307                         nmp->nm_state |= NFSSTA_OPENMODE;
1308                         NFSUNLOCKMNT(nmp);
1309                 }
1310                 if (error == NFSERR_STALESTATEID)
1311                         nfscl_initiate_recovery(nmp->nm_clp);
1312                 if (lckp != NULL)
1313                         nfscl_lockderef(lckp);
1314                 if (!openerr)
1315                         (void) nfsrpc_close(vp, 0, p);
1316                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1317                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1318                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1319                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1320                 } else if ((error == NFSERR_EXPIRED ||
1321                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1322                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1323                 }
1324                 retrycnt++;
1325         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1326             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1327             error == NFSERR_BADSESSION ||
1328             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1329             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1330              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1331             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1332              retrycnt < 4));
1333         if (error && retrycnt >= 4)
1334                 error = EIO;
1335         return (error);
1336 }
1337
1338 static int
1339 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1340     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1341     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1342 {
1343         u_int32_t *tl;
1344         struct nfsrv_descript nfsd, *nd = &nfsd;
1345         int error;
1346         nfsattrbit_t attrbits;
1347
1348         *attrflagp = 0;
1349         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1350         if (nd->nd_flag & ND_NFSV4)
1351                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1352         vap->va_type = vnode_vtype(vp);
1353         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1354         if (nd->nd_flag & ND_NFSV3) {
1355                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1356                 *tl = newnfs_false;
1357         } else if (nd->nd_flag & ND_NFSV4) {
1358                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1359                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1360                 NFSGETATTR_ATTRBIT(&attrbits);
1361                 (void) nfsrv_putattrbit(nd, &attrbits);
1362         }
1363         error = nfscl_request(nd, vp, p, cred, stuff);
1364         if (error)
1365                 return (error);
1366         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1367                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1368         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1369                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1370         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1371                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1372         m_freem(nd->nd_mrep);
1373         if (nd->nd_repstat && !error)
1374                 error = nd->nd_repstat;
1375         return (error);
1376 }
1377
1378 /*
1379  * nfs lookup rpc
1380  */
1381 int
1382 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1383     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1384     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1385 {
1386         u_int32_t *tl;
1387         struct nfsrv_descript nfsd, *nd = &nfsd;
1388         struct nfsmount *nmp;
1389         struct nfsnode *np;
1390         struct nfsfh *nfhp;
1391         nfsattrbit_t attrbits;
1392         int error = 0, lookupp = 0;
1393
1394         *attrflagp = 0;
1395         *dattrflagp = 0;
1396         if (vnode_vtype(dvp) != VDIR)
1397                 return (ENOTDIR);
1398         nmp = VFSTONFS(dvp->v_mount);
1399         if (len > NFS_MAXNAMLEN)
1400                 return (ENAMETOOLONG);
1401         if (NFSHASNFSV4(nmp) && len == 1 &&
1402                 name[0] == '.') {
1403                 /*
1404                  * Just return the current dir's fh.
1405                  */
1406                 np = VTONFS(dvp);
1407                 nfhp = malloc(sizeof (struct nfsfh) +
1408                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1409                 nfhp->nfh_len = np->n_fhp->nfh_len;
1410                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1411                 *nfhpp = nfhp;
1412                 return (0);
1413         }
1414         if (NFSHASNFSV4(nmp) && len == 2 &&
1415                 name[0] == '.' && name[1] == '.') {
1416                 lookupp = 1;
1417                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1418         } else {
1419                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1420                 (void) nfsm_strtom(nd, name, len);
1421         }
1422         if (nd->nd_flag & ND_NFSV4) {
1423                 NFSGETATTR_ATTRBIT(&attrbits);
1424                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1425                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1426                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1427                 (void) nfsrv_putattrbit(nd, &attrbits);
1428         }
1429         error = nfscl_request(nd, dvp, p, cred, stuff);
1430         if (error)
1431                 return (error);
1432         if (nd->nd_repstat) {
1433                 /*
1434                  * When an NFSv4 Lookupp returns ENOENT, it means that
1435                  * the lookup is at the root of an fs, so return this dir.
1436                  */
1437                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1438                     np = VTONFS(dvp);
1439                     nfhp = malloc(sizeof (struct nfsfh) +
1440                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1441                     nfhp->nfh_len = np->n_fhp->nfh_len;
1442                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1443                     *nfhpp = nfhp;
1444                     m_freem(nd->nd_mrep);
1445                     return (0);
1446                 }
1447                 if (nd->nd_flag & ND_NFSV3)
1448                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1449                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1450                     ND_NFSV4) {
1451                         /* Load the directory attributes. */
1452                         error = nfsm_loadattr(nd, dnap);
1453                         if (error == 0)
1454                                 *dattrflagp = 1;
1455                 }
1456                 goto nfsmout;
1457         }
1458         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1459                 /* Load the directory attributes. */
1460                 error = nfsm_loadattr(nd, dnap);
1461                 if (error != 0)
1462                         goto nfsmout;
1463                 *dattrflagp = 1;
1464                 /* Skip over the Lookup and GetFH operation status values. */
1465                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1466         }
1467         error = nfsm_getfh(nd, nfhpp);
1468         if (error)
1469                 goto nfsmout;
1470
1471         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1472         if ((nd->nd_flag & ND_NFSV3) && !error)
1473                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1474 nfsmout:
1475         m_freem(nd->nd_mrep);
1476         if (!error && nd->nd_repstat)
1477                 error = nd->nd_repstat;
1478         return (error);
1479 }
1480
1481 /*
1482  * Do a readlink rpc.
1483  */
1484 int
1485 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1486     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1487 {
1488         u_int32_t *tl;
1489         struct nfsrv_descript nfsd, *nd = &nfsd;
1490         struct nfsnode *np = VTONFS(vp);
1491         nfsattrbit_t attrbits;
1492         int error, len, cangetattr = 1;
1493
1494         *attrflagp = 0;
1495         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1496         if (nd->nd_flag & ND_NFSV4) {
1497                 /*
1498                  * And do a Getattr op.
1499                  */
1500                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1501                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1502                 NFSGETATTR_ATTRBIT(&attrbits);
1503                 (void) nfsrv_putattrbit(nd, &attrbits);
1504         }
1505         error = nfscl_request(nd, vp, p, cred, stuff);
1506         if (error)
1507                 return (error);
1508         if (nd->nd_flag & ND_NFSV3)
1509                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1510         if (!nd->nd_repstat && !error) {
1511                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1512                 /*
1513                  * This seems weird to me, but must have been added to
1514                  * FreeBSD for some reason. The only thing I can think of
1515                  * is that there was/is some server that replies with
1516                  * more link data than it should?
1517                  */
1518                 if (len == NFS_MAXPATHLEN) {
1519                         NFSLOCKNODE(np);
1520                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1521                                 len = np->n_size;
1522                                 cangetattr = 0;
1523                         }
1524                         NFSUNLOCKNODE(np);
1525                 }
1526                 error = nfsm_mbufuio(nd, uiop, len);
1527                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1528                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1529         }
1530         if (nd->nd_repstat && !error)
1531                 error = nd->nd_repstat;
1532 nfsmout:
1533         m_freem(nd->nd_mrep);
1534         return (error);
1535 }
1536
1537 /*
1538  * Read operation.
1539  */
1540 int
1541 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1542     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1543 {
1544         int error, expireret = 0, retrycnt;
1545         u_int32_t clidrev = 0;
1546         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1547         struct nfsnode *np = VTONFS(vp);
1548         struct ucred *newcred;
1549         struct nfsfh *nfhp = NULL;
1550         nfsv4stateid_t stateid;
1551         void *lckp;
1552
1553         if (nmp->nm_clp != NULL)
1554                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1555         newcred = cred;
1556         if (NFSHASNFSV4(nmp)) {
1557                 nfhp = np->n_fhp;
1558                 newcred = NFSNEWCRED(cred);
1559         }
1560         retrycnt = 0;
1561         do {
1562                 lckp = NULL;
1563                 if (NFSHASNFSV4(nmp))
1564                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1565                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1566                             &lckp);
1567                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1568                     attrflagp, stuff);
1569                 if (error == NFSERR_OPENMODE) {
1570                         NFSLOCKMNT(nmp);
1571                         nmp->nm_state |= NFSSTA_OPENMODE;
1572                         NFSUNLOCKMNT(nmp);
1573                 }
1574                 if (error == NFSERR_STALESTATEID)
1575                         nfscl_initiate_recovery(nmp->nm_clp);
1576                 if (lckp != NULL)
1577                         nfscl_lockderef(lckp);
1578                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1579                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1580                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1581                         (void) nfs_catnap(PZERO, error, "nfs_read");
1582                 } else if ((error == NFSERR_EXPIRED ||
1583                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1584                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1585                 }
1586                 retrycnt++;
1587         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1588             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1589             error == NFSERR_BADSESSION ||
1590             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1591             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1592              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1593             (error == NFSERR_OPENMODE && retrycnt < 4));
1594         if (error && retrycnt >= 4)
1595                 error = EIO;
1596         if (NFSHASNFSV4(nmp))
1597                 NFSFREECRED(newcred);
1598         return (error);
1599 }
1600
1601 /*
1602  * The actual read RPC.
1603  */
1604 static int
1605 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1606     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1607     int *attrflagp, void *stuff)
1608 {
1609         u_int32_t *tl;
1610         int error = 0, len, retlen, tsiz, eof = 0;
1611         struct nfsrv_descript nfsd;
1612         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1613         struct nfsrv_descript *nd = &nfsd;
1614         int rsize;
1615         off_t tmp_off;
1616
1617         *attrflagp = 0;
1618         tsiz = uiop->uio_resid;
1619         tmp_off = uiop->uio_offset + tsiz;
1620         NFSLOCKMNT(nmp);
1621         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1622                 NFSUNLOCKMNT(nmp);
1623                 return (EFBIG);
1624         }
1625         rsize = nmp->nm_rsize;
1626         NFSUNLOCKMNT(nmp);
1627         nd->nd_mrep = NULL;
1628         while (tsiz > 0) {
1629                 *attrflagp = 0;
1630                 len = (tsiz > rsize) ? rsize : tsiz;
1631                 NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1632                 if (nd->nd_flag & ND_NFSV4)
1633                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1634                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1635                 if (nd->nd_flag & ND_NFSV2) {
1636                         *tl++ = txdr_unsigned(uiop->uio_offset);
1637                         *tl++ = txdr_unsigned(len);
1638                         *tl = 0;
1639                 } else {
1640                         txdr_hyper(uiop->uio_offset, tl);
1641                         *(tl + 2) = txdr_unsigned(len);
1642                 }
1643                 /*
1644                  * Since I can't do a Getattr for NFSv4 for Write, there
1645                  * doesn't seem any point in doing one here, either.
1646                  * (See the comment in nfsrpc_writerpc() for more info.)
1647                  */
1648                 error = nfscl_request(nd, vp, p, cred, stuff);
1649                 if (error)
1650                         return (error);
1651                 if (nd->nd_flag & ND_NFSV3) {
1652                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1653                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1654                         error = nfsm_loadattr(nd, nap);
1655                         if (!error)
1656                                 *attrflagp = 1;
1657                 }
1658                 if (nd->nd_repstat || error) {
1659                         if (!error)
1660                                 error = nd->nd_repstat;
1661                         goto nfsmout;
1662                 }
1663                 if (nd->nd_flag & ND_NFSV3) {
1664                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1665                         eof = fxdr_unsigned(int, *(tl + 1));
1666                 } else if (nd->nd_flag & ND_NFSV4) {
1667                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1668                         eof = fxdr_unsigned(int, *tl);
1669                 }
1670                 NFSM_STRSIZ(retlen, len);
1671                 error = nfsm_mbufuio(nd, uiop, retlen);
1672                 if (error)
1673                         goto nfsmout;
1674                 m_freem(nd->nd_mrep);
1675                 nd->nd_mrep = NULL;
1676                 tsiz -= retlen;
1677                 if (!(nd->nd_flag & ND_NFSV2)) {
1678                         if (eof || retlen == 0)
1679                                 tsiz = 0;
1680                 } else if (retlen < len)
1681                         tsiz = 0;
1682         }
1683         return (0);
1684 nfsmout:
1685         if (nd->nd_mrep != NULL)
1686                 m_freem(nd->nd_mrep);
1687         return (error);
1688 }
1689
1690 /*
1691  * nfs write operation
1692  * When called_from_strategy != 0, it should return EIO for an error that
1693  * indicates recovery is in progress, so that the buffer will be left
1694  * dirty and be written back to the server later. If it loops around,
1695  * the recovery thread could get stuck waiting for the buffer and recovery
1696  * will then deadlock.
1697  */
1698 int
1699 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1700     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1701     void *stuff, int called_from_strategy)
1702 {
1703         int error, expireret = 0, retrycnt, nostateid;
1704         u_int32_t clidrev = 0;
1705         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1706         struct nfsnode *np = VTONFS(vp);
1707         struct ucred *newcred;
1708         struct nfsfh *nfhp = NULL;
1709         nfsv4stateid_t stateid;
1710         void *lckp;
1711
1712         *must_commit = 0;
1713         if (nmp->nm_clp != NULL)
1714                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1715         newcred = cred;
1716         if (NFSHASNFSV4(nmp)) {
1717                 newcred = NFSNEWCRED(cred);
1718                 nfhp = np->n_fhp;
1719         }
1720         retrycnt = 0;
1721         do {
1722                 lckp = NULL;
1723                 nostateid = 0;
1724                 if (NFSHASNFSV4(nmp)) {
1725                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1726                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1727                             &lckp);
1728                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1729                             stateid.other[2] == 0) {
1730                                 nostateid = 1;
1731                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1732                         }
1733                 }
1734
1735                 /*
1736                  * If there is no stateid for NFSv4, it means this is an
1737                  * extraneous write after close. Basically a poorly
1738                  * implemented buffer cache. Just don't do the write.
1739                  */
1740                 if (nostateid)
1741                         error = 0;
1742                 else
1743                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1744                             newcred, &stateid, p, nap, attrflagp, stuff);
1745                 if (error == NFSERR_STALESTATEID)
1746                         nfscl_initiate_recovery(nmp->nm_clp);
1747                 if (lckp != NULL)
1748                         nfscl_lockderef(lckp);
1749                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1750                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1751                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1752                         (void) nfs_catnap(PZERO, error, "nfs_write");
1753                 } else if ((error == NFSERR_EXPIRED ||
1754                     error == NFSERR_BADSTATEID) && clidrev != 0) {
1755                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1756                 }
1757                 retrycnt++;
1758         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1759             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1760               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1761             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1762             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1763              expireret == 0 && clidrev != 0 && retrycnt < 4));
1764         if (error != 0 && (retrycnt >= 4 ||
1765             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1766               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1767                 error = EIO;
1768         if (NFSHASNFSV4(nmp))
1769                 NFSFREECRED(newcred);
1770         return (error);
1771 }
1772
1773 /*
1774  * The actual write RPC.
1775  */
1776 static int
1777 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1778     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1779     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1780 {
1781         u_int32_t *tl;
1782         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1783         struct nfsnode *np = VTONFS(vp);
1784         int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1785         int wccflag = 0, wsize;
1786         int32_t backup;
1787         struct nfsrv_descript nfsd;
1788         struct nfsrv_descript *nd = &nfsd;
1789         nfsattrbit_t attrbits;
1790         off_t tmp_off;
1791
1792         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1793         *attrflagp = 0;
1794         tsiz = uiop->uio_resid;
1795         tmp_off = uiop->uio_offset + tsiz;
1796         NFSLOCKMNT(nmp);
1797         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1798                 NFSUNLOCKMNT(nmp);
1799                 return (EFBIG);
1800         }
1801         wsize = nmp->nm_wsize;
1802         NFSUNLOCKMNT(nmp);
1803         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1804         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1805         while (tsiz > 0) {
1806                 *attrflagp = 0;
1807                 len = (tsiz > wsize) ? wsize : tsiz;
1808                 NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1809                 if (nd->nd_flag & ND_NFSV4) {
1810                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1811                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1812                         txdr_hyper(uiop->uio_offset, tl);
1813                         tl += 2;
1814                         *tl++ = txdr_unsigned(*iomode);
1815                         *tl = txdr_unsigned(len);
1816                 } else if (nd->nd_flag & ND_NFSV3) {
1817                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1818                         txdr_hyper(uiop->uio_offset, tl);
1819                         tl += 2;
1820                         *tl++ = txdr_unsigned(len);
1821                         *tl++ = txdr_unsigned(*iomode);
1822                         *tl = txdr_unsigned(len);
1823                 } else {
1824                         u_int32_t x;
1825
1826                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1827                         /*
1828                          * Not sure why someone changed this, since the
1829                          * RFC clearly states that "beginoffset" and
1830                          * "totalcount" are ignored, but it wouldn't
1831                          * surprise me if there's a busted server out there.
1832                          */
1833                         /* Set both "begin" and "current" to non-garbage. */
1834                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1835                         *tl++ = x;      /* "begin offset" */
1836                         *tl++ = x;      /* "current offset" */
1837                         x = txdr_unsigned(len);
1838                         *tl++ = x;      /* total to this offset */
1839                         *tl = x;        /* size of this write */
1840                 }
1841                 nfsm_uiombuf(nd, uiop, len);
1842                 /*
1843                  * Although it is tempting to do a normal Getattr Op in the
1844                  * NFSv4 compound, the result can be a nearly hung client
1845                  * system if the Getattr asks for Owner and/or OwnerGroup.
1846                  * It occurs when the client can't map either the Owner or
1847                  * Owner_group name in the Getattr reply to a uid/gid. When
1848                  * there is a cache miss, the kernel does an upcall to the
1849                  * nfsuserd. Then, it can try and read the local /etc/passwd
1850                  * or /etc/group file. It can then block in getnewbuf(),
1851                  * waiting for dirty writes to be pushed to the NFS server.
1852                  * The only reason this doesn't result in a complete
1853                  * deadlock, is that the upcall times out and allows
1854                  * the write to complete. However, progress is so slow
1855                  * that it might just as well be deadlocked.
1856                  * As such, we get the rest of the attributes, but not
1857                  * Owner or Owner_group.
1858                  * nb: nfscl_loadattrcache() needs to be told that these
1859                  *     partial attributes from a write rpc are being
1860                  *     passed in, via a argument flag.
1861                  */
1862                 if (nd->nd_flag & ND_NFSV4) {
1863                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1864                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1865                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1866                         (void) nfsrv_putattrbit(nd, &attrbits);
1867                 }
1868                 error = nfscl_request(nd, vp, p, cred, stuff);
1869                 if (error)
1870                         return (error);
1871                 if (nd->nd_repstat) {
1872                         /*
1873                          * In case the rpc gets retried, roll
1874                          * the uio fileds changed by nfsm_uiombuf()
1875                          * back.
1876                          */
1877                         uiop->uio_offset -= len;
1878                         uiop->uio_resid += len;
1879                         uiop->uio_iov->iov_base =
1880                             (char *)uiop->uio_iov->iov_base - len;
1881                         uiop->uio_iov->iov_len += len;
1882                 }
1883                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1884                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1885                             &wccflag, stuff);
1886                         if (error)
1887                                 goto nfsmout;
1888                 }
1889                 if (!nd->nd_repstat) {
1890                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1891                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1892                                         + NFSX_VERF);
1893                                 rlen = fxdr_unsigned(int, *tl++);
1894                                 if (rlen == 0) {
1895                                         error = NFSERR_IO;
1896                                         goto nfsmout;
1897                                 } else if (rlen < len) {
1898                                         backup = len - rlen;
1899                                         uiop->uio_iov->iov_base =
1900                                             (char *)uiop->uio_iov->iov_base -
1901                                             backup;
1902                                         uiop->uio_iov->iov_len += backup;
1903                                         uiop->uio_offset -= backup;
1904                                         uiop->uio_resid += backup;
1905                                         len = rlen;
1906                                 }
1907                                 commit = fxdr_unsigned(int, *tl++);
1908
1909                                 /*
1910                                  * Return the lowest commitment level
1911                                  * obtained by any of the RPCs.
1912                                  */
1913                                 if (committed == NFSWRITE_FILESYNC)
1914                                         committed = commit;
1915                                 else if (committed == NFSWRITE_DATASYNC &&
1916                                         commit == NFSWRITE_UNSTABLE)
1917                                         committed = commit;
1918                                 NFSLOCKMNT(nmp);
1919                                 if (!NFSHASWRITEVERF(nmp)) {
1920                                         NFSBCOPY((caddr_t)tl,
1921                                             (caddr_t)&nmp->nm_verf[0],
1922                                             NFSX_VERF);
1923                                         NFSSETWRITEVERF(nmp);
1924                                 } else if (NFSBCMP(tl, nmp->nm_verf,
1925                                     NFSX_VERF)) {
1926                                         *must_commit = 1;
1927                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1928                                 }
1929                                 NFSUNLOCKMNT(nmp);
1930                         }
1931                         if (nd->nd_flag & ND_NFSV4)
1932                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1933                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1934                                 error = nfsm_loadattr(nd, nap);
1935                                 if (!error)
1936                                         *attrflagp = NFS_LATTR_NOSHRINK;
1937                         }
1938                 } else {
1939                         error = nd->nd_repstat;
1940                 }
1941                 if (error)
1942                         goto nfsmout;
1943                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1944                 m_freem(nd->nd_mrep);
1945                 nd->nd_mrep = NULL;
1946                 tsiz -= len;
1947         }
1948 nfsmout:
1949         if (nd->nd_mrep != NULL)
1950                 m_freem(nd->nd_mrep);
1951         *iomode = committed;
1952         if (nd->nd_repstat && !error)
1953                 error = nd->nd_repstat;
1954         return (error);
1955 }
1956
1957 /*
1958  * nfs mknod rpc
1959  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1960  * mode set to specify the file type and the size field for rdev.
1961  */
1962 int
1963 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1964     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1965     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1966     int *attrflagp, int *dattrflagp, void *dstuff)
1967 {
1968         u_int32_t *tl;
1969         int error = 0;
1970         struct nfsrv_descript nfsd, *nd = &nfsd;
1971         nfsattrbit_t attrbits;
1972
1973         *nfhpp = NULL;
1974         *attrflagp = 0;
1975         *dattrflagp = 0;
1976         if (namelen > NFS_MAXNAMLEN)
1977                 return (ENAMETOOLONG);
1978         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1979         if (nd->nd_flag & ND_NFSV4) {
1980                 if (vtyp == VBLK || vtyp == VCHR) {
1981                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1982                         *tl++ = vtonfsv34_type(vtyp);
1983                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
1984                         *tl = txdr_unsigned(NFSMINOR(rdev));
1985                 } else {
1986                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1987                         *tl = vtonfsv34_type(vtyp);
1988                 }
1989         }
1990         (void) nfsm_strtom(nd, name, namelen);
1991         if (nd->nd_flag & ND_NFSV3) {
1992                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1993                 *tl = vtonfsv34_type(vtyp);
1994         }
1995         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1996                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
1997         if ((nd->nd_flag & ND_NFSV3) &&
1998             (vtyp == VCHR || vtyp == VBLK)) {
1999                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2000                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2001                 *tl = txdr_unsigned(NFSMINOR(rdev));
2002         }
2003         if (nd->nd_flag & ND_NFSV4) {
2004                 NFSGETATTR_ATTRBIT(&attrbits);
2005                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2006                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2007                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2008                 (void) nfsrv_putattrbit(nd, &attrbits);
2009         }
2010         if (nd->nd_flag & ND_NFSV2)
2011                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2012         error = nfscl_request(nd, dvp, p, cred, dstuff);
2013         if (error)
2014                 return (error);
2015         if (nd->nd_flag & ND_NFSV4)
2016                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2017         if (!nd->nd_repstat) {
2018                 if (nd->nd_flag & ND_NFSV4) {
2019                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2020                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2021                         if (error)
2022                                 goto nfsmout;
2023                 }
2024                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2025                 if (error)
2026                         goto nfsmout;
2027         }
2028         if (nd->nd_flag & ND_NFSV3)
2029                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2030         if (!error && nd->nd_repstat)
2031                 error = nd->nd_repstat;
2032 nfsmout:
2033         m_freem(nd->nd_mrep);
2034         return (error);
2035 }
2036
2037 /*
2038  * nfs file create call
2039  * Mostly just call the approriate routine. (I separated out v4, so that
2040  * error recovery wouldn't be as difficult.)
2041  */
2042 int
2043 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2044     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2045     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2046     int *attrflagp, int *dattrflagp, void *dstuff)
2047 {
2048         int error = 0, newone, expireret = 0, retrycnt, unlocked;
2049         struct nfsclowner *owp;
2050         struct nfscldeleg *dp;
2051         struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2052         u_int32_t clidrev;
2053
2054         if (NFSHASNFSV4(nmp)) {
2055             retrycnt = 0;
2056             do {
2057                 dp = NULL;
2058                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2059                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2060                     NULL, 1);
2061                 if (error)
2062                         return (error);
2063                 if (nmp->nm_clp != NULL)
2064                         clidrev = nmp->nm_clp->nfsc_clientidrev;
2065                 else
2066                         clidrev = 0;
2067                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2068                     nfs_numnfscbd == 0 || retrycnt > 0)
2069                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2070                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2071                           attrflagp, dattrflagp, dstuff, &unlocked);
2072                 else
2073                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2074                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2075                           attrflagp, dattrflagp, dstuff, &unlocked);
2076                 /*
2077                  * There is no need to invalidate cached attributes here,
2078                  * since new post-delegation issue attributes are always
2079                  * returned by nfsrpc_createv4() and these will update the
2080                  * attribute cache.
2081                  */
2082                 if (dp != NULL)
2083                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2084                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2085                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2086                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2087                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2088                     error == NFSERR_BADSESSION) {
2089                         (void) nfs_catnap(PZERO, error, "nfs_open");
2090                 } else if ((error == NFSERR_EXPIRED ||
2091                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2092                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2093                         retrycnt++;
2094                 }
2095             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2096                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2097                 error == NFSERR_BADSESSION ||
2098                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2099                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2100             if (error && retrycnt >= 4)
2101                     error = EIO;
2102         } else {
2103                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2104                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2105                     dstuff);
2106         }
2107         return (error);
2108 }
2109
2110 /*
2111  * The create rpc for v2 and 3.
2112  */
2113 static int
2114 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2115     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2116     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2117     int *attrflagp, int *dattrflagp, void *dstuff)
2118 {
2119         u_int32_t *tl;
2120         int error = 0;
2121         struct nfsrv_descript nfsd, *nd = &nfsd;
2122
2123         *nfhpp = NULL;
2124         *attrflagp = 0;
2125         *dattrflagp = 0;
2126         if (namelen > NFS_MAXNAMLEN)
2127                 return (ENAMETOOLONG);
2128         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2129         (void) nfsm_strtom(nd, name, namelen);
2130         if (nd->nd_flag & ND_NFSV3) {
2131                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2132                 if (fmode & O_EXCL) {
2133                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2134                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2135                         *tl++ = cverf.lval[0];
2136                         *tl = cverf.lval[1];
2137                 } else {
2138                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2139                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2140                 }
2141         } else {
2142                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2143         }
2144         error = nfscl_request(nd, dvp, p, cred, dstuff);
2145         if (error)
2146                 return (error);
2147         if (nd->nd_repstat == 0) {
2148                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2149                 if (error)
2150                         goto nfsmout;
2151         }
2152         if (nd->nd_flag & ND_NFSV3)
2153                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2154         if (nd->nd_repstat != 0 && error == 0)
2155                 error = nd->nd_repstat;
2156 nfsmout:
2157         m_freem(nd->nd_mrep);
2158         return (error);
2159 }
2160
2161 static int
2162 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2163     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2164     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2165     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2166     int *dattrflagp, void *dstuff, int *unlockedp)
2167 {
2168         u_int32_t *tl;
2169         int error = 0, deleg, newone, ret, acesize, limitby;
2170         struct nfsrv_descript nfsd, *nd = &nfsd;
2171         struct nfsclopen *op;
2172         struct nfscldeleg *dp = NULL;
2173         struct nfsnode *np;
2174         struct nfsfh *nfhp;
2175         nfsattrbit_t attrbits;
2176         nfsv4stateid_t stateid;
2177         u_int32_t rflags;
2178         struct nfsmount *nmp;
2179         struct nfsclsession *tsep;
2180
2181         nmp = VFSTONFS(dvp->v_mount);
2182         np = VTONFS(dvp);
2183         *unlockedp = 0;
2184         *nfhpp = NULL;
2185         *dpp = NULL;
2186         *attrflagp = 0;
2187         *dattrflagp = 0;
2188         if (namelen > NFS_MAXNAMLEN)
2189                 return (ENAMETOOLONG);
2190         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2191         /*
2192          * For V4, this is actually an Open op.
2193          */
2194         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2195         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2196         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2197             NFSV4OPEN_ACCESSREAD);
2198         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2199         tsep = nfsmnt_mdssession(nmp);
2200         *tl++ = tsep->nfsess_clientid.lval[0];
2201         *tl = tsep->nfsess_clientid.lval[1];
2202         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2203         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2204         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2205         if (fmode & O_EXCL) {
2206                 if (NFSHASNFSV4N(nmp)) {
2207                         if (NFSHASSESSPERSIST(nmp)) {
2208                                 /* Use GUARDED for persistent sessions. */
2209                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2210                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2211                         } else {
2212                                 /* Otherwise, use EXCLUSIVE4_1. */
2213                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2214                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2215                                 *tl++ = cverf.lval[0];
2216                                 *tl = cverf.lval[1];
2217                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2218                         }
2219                 } else {
2220                         /* NFSv4.0 */
2221                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2222                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2223                         *tl++ = cverf.lval[0];
2224                         *tl = cverf.lval[1];
2225                 }
2226         } else {
2227                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2228                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2229         }
2230         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2231         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2232         (void) nfsm_strtom(nd, name, namelen);
2233         /* Get the new file's handle and attributes. */
2234         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2235         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2236         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2237         NFSGETATTR_ATTRBIT(&attrbits);
2238         (void) nfsrv_putattrbit(nd, &attrbits);
2239         /* Get the directory's post-op attributes. */
2240         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2241         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2242         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2243         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2244         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2245         (void) nfsrv_putattrbit(nd, &attrbits);
2246         error = nfscl_request(nd, dvp, p, cred, dstuff);
2247         if (error)
2248                 return (error);
2249         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2250         if (nd->nd_repstat == 0) {
2251                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2252                     6 * NFSX_UNSIGNED);
2253                 stateid.seqid = *tl++;
2254                 stateid.other[0] = *tl++;
2255                 stateid.other[1] = *tl++;
2256                 stateid.other[2] = *tl;
2257                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2258                 (void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2259                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2260                 deleg = fxdr_unsigned(int, *tl);
2261                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2262                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2263                         if (!(owp->nfsow_clp->nfsc_flags &
2264                               NFSCLFLAGS_FIRSTDELEG))
2265                                 owp->nfsow_clp->nfsc_flags |=
2266                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2267                         dp = malloc(
2268                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2269                             M_NFSCLDELEG, M_WAITOK);
2270                         LIST_INIT(&dp->nfsdl_owner);
2271                         LIST_INIT(&dp->nfsdl_lock);
2272                         dp->nfsdl_clp = owp->nfsow_clp;
2273                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2274                         nfscl_lockinit(&dp->nfsdl_rwlock);
2275                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2276                             NFSX_UNSIGNED);
2277                         dp->nfsdl_stateid.seqid = *tl++;
2278                         dp->nfsdl_stateid.other[0] = *tl++;
2279                         dp->nfsdl_stateid.other[1] = *tl++;
2280                         dp->nfsdl_stateid.other[2] = *tl++;
2281                         ret = fxdr_unsigned(int, *tl);
2282                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2283                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2284                                 /*
2285                                  * Indicates how much the file can grow.
2286                                  */
2287                                 NFSM_DISSECT(tl, u_int32_t *,
2288                                     3 * NFSX_UNSIGNED);
2289                                 limitby = fxdr_unsigned(int, *tl++);
2290                                 switch (limitby) {
2291                                 case NFSV4OPEN_LIMITSIZE:
2292                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2293                                         break;
2294                                 case NFSV4OPEN_LIMITBLOCKS:
2295                                         dp->nfsdl_sizelimit =
2296                                             fxdr_unsigned(u_int64_t, *tl++);
2297                                         dp->nfsdl_sizelimit *=
2298                                             fxdr_unsigned(u_int64_t, *tl);
2299                                         break;
2300                                 default:
2301                                         error = NFSERR_BADXDR;
2302                                         goto nfsmout;
2303                                 }
2304                         } else {
2305                                 dp->nfsdl_flags = NFSCLDL_READ;
2306                         }
2307                         if (ret)
2308                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2309                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2310                             &acesize, p);
2311                         if (error)
2312                                 goto nfsmout;
2313                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2314                         error = NFSERR_BADXDR;
2315                         goto nfsmout;
2316                 }
2317                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2318                 if (error)
2319                         goto nfsmout;
2320                 /* Get rid of the PutFH and Getattr status values. */
2321                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2322                 /* Load the directory attributes. */
2323                 error = nfsm_loadattr(nd, dnap);
2324                 if (error)
2325                         goto nfsmout;
2326                 *dattrflagp = 1;
2327                 if (dp != NULL && *attrflagp) {
2328                         dp->nfsdl_change = nnap->na_filerev;
2329                         dp->nfsdl_modtime = nnap->na_mtime;
2330                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2331                 }
2332                 /*
2333                  * We can now complete the Open state.
2334                  */
2335                 nfhp = *nfhpp;
2336                 if (dp != NULL) {
2337                         dp->nfsdl_fhlen = nfhp->nfh_len;
2338                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2339                 }
2340                 /*
2341                  * Get an Open structure that will be
2342                  * attached to the OpenOwner, acquired already.
2343                  */
2344                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2345                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2346                     cred, p, NULL, &op, &newone, NULL, 0);
2347                 if (error)
2348                         goto nfsmout;
2349                 op->nfso_stateid = stateid;
2350                 newnfs_copyincred(cred, &op->nfso_cred);
2351                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2352                     do {
2353                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2354                             nfhp->nfh_len, op, cred, p);
2355                         if (ret == NFSERR_DELAY)
2356                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2357                     } while (ret == NFSERR_DELAY);
2358                     error = ret;
2359                 }
2360
2361                 /*
2362                  * If the server is handing out delegations, but we didn't
2363                  * get one because an OpenConfirm was required, try the
2364                  * Open again, to get a delegation. This is a harmless no-op,
2365                  * from a server's point of view.
2366                  */
2367                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2368                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2369                     !error && dp == NULL) {
2370                     do {
2371                         ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2372                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2373                             nfhp->nfh_fh, nfhp->nfh_len,
2374                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2375                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2376                         if (ret == NFSERR_DELAY)
2377                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2378                     } while (ret == NFSERR_DELAY);
2379                     if (ret) {
2380                         if (dp != NULL) {
2381                                 free(dp, M_NFSCLDELEG);
2382                                 dp = NULL;
2383                         }
2384                         if (ret == NFSERR_STALECLIENTID ||
2385                             ret == NFSERR_STALEDONTRECOVER ||
2386                             ret == NFSERR_BADSESSION)
2387                                 error = ret;
2388                     }
2389                 }
2390                 nfscl_openrelease(nmp, op, error, newone);
2391                 *unlockedp = 1;
2392         }
2393         if (nd->nd_repstat != 0 && error == 0)
2394                 error = nd->nd_repstat;
2395         if (error == NFSERR_STALECLIENTID)
2396                 nfscl_initiate_recovery(owp->nfsow_clp);
2397 nfsmout:
2398         if (!error)
2399                 *dpp = dp;
2400         else if (dp != NULL)
2401                 free(dp, M_NFSCLDELEG);
2402         m_freem(nd->nd_mrep);
2403         return (error);
2404 }
2405
2406 /*
2407  * Nfs remove rpc
2408  */
2409 int
2410 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2411     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2412     void *dstuff)
2413 {
2414         u_int32_t *tl;
2415         struct nfsrv_descript nfsd, *nd = &nfsd;
2416         struct nfsnode *np;
2417         struct nfsmount *nmp;
2418         nfsv4stateid_t dstateid;
2419         int error, ret = 0, i;
2420
2421         *dattrflagp = 0;
2422         if (namelen > NFS_MAXNAMLEN)
2423                 return (ENAMETOOLONG);
2424         nmp = VFSTONFS(dvp->v_mount);
2425 tryagain:
2426         if (NFSHASNFSV4(nmp) && ret == 0) {
2427                 ret = nfscl_removedeleg(vp, p, &dstateid);
2428                 if (ret == 1) {
2429                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2430                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2431                             NFSX_UNSIGNED);
2432                         if (NFSHASNFSV4N(nmp))
2433                                 *tl++ = 0;
2434                         else
2435                                 *tl++ = dstateid.seqid;
2436                         *tl++ = dstateid.other[0];
2437                         *tl++ = dstateid.other[1];
2438                         *tl++ = dstateid.other[2];
2439                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2440                         np = VTONFS(dvp);
2441                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2442                             np->n_fhp->nfh_len, 0);
2443                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2444                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2445                 }
2446         } else {
2447                 ret = 0;
2448         }
2449         if (ret == 0)
2450                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2451         (void) nfsm_strtom(nd, name, namelen);
2452         error = nfscl_request(nd, dvp, p, cred, dstuff);
2453         if (error)
2454                 return (error);
2455         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2456                 /* For NFSv4, parse out any Delereturn replies. */
2457                 if (ret > 0 && nd->nd_repstat != 0 &&
2458                     (nd->nd_flag & ND_NOMOREDATA)) {
2459                         /*
2460                          * If the Delegreturn failed, try again without
2461                          * it. The server will Recall, as required.
2462                          */
2463                         m_freem(nd->nd_mrep);
2464                         goto tryagain;
2465                 }
2466                 for (i = 0; i < (ret * 2); i++) {
2467                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2468                             ND_NFSV4) {
2469                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2470                             if (*(tl + 1))
2471                                 nd->nd_flag |= ND_NOMOREDATA;
2472                         }
2473                 }
2474                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2475         }
2476         if (nd->nd_repstat && !error)
2477                 error = nd->nd_repstat;
2478 nfsmout:
2479         m_freem(nd->nd_mrep);
2480         return (error);
2481 }
2482
2483 /*
2484  * Do an nfs rename rpc.
2485  */
2486 int
2487 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2488     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2489     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2490     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2491 {
2492         u_int32_t *tl;
2493         struct nfsrv_descript nfsd, *nd = &nfsd;
2494         struct nfsmount *nmp;
2495         struct nfsnode *np;
2496         nfsattrbit_t attrbits;
2497         nfsv4stateid_t fdstateid, tdstateid;
2498         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2499
2500         *fattrflagp = 0;
2501         *tattrflagp = 0;
2502         nmp = VFSTONFS(fdvp->v_mount);
2503         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2504                 return (ENAMETOOLONG);
2505 tryagain:
2506         if (NFSHASNFSV4(nmp) && ret == 0) {
2507                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2508                     &tdstateid, &gottd, p);
2509                 if (gotfd && gottd) {
2510                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2511                 } else if (gotfd) {
2512                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2513                 } else if (gottd) {
2514                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2515                 }
2516                 if (gotfd) {
2517                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2518                         if (NFSHASNFSV4N(nmp))
2519                                 *tl++ = 0;
2520                         else
2521                                 *tl++ = fdstateid.seqid;
2522                         *tl++ = fdstateid.other[0];
2523                         *tl++ = fdstateid.other[1];
2524                         *tl = fdstateid.other[2];
2525                         if (gottd) {
2526                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2527                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2528                                 np = VTONFS(tvp);
2529                                 (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2530                                     np->n_fhp->nfh_len, 0);
2531                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2532                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2533                         }
2534                 }
2535                 if (gottd) {
2536                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2537                         if (NFSHASNFSV4N(nmp))
2538                                 *tl++ = 0;
2539                         else
2540                                 *tl++ = tdstateid.seqid;
2541                         *tl++ = tdstateid.other[0];
2542                         *tl++ = tdstateid.other[1];
2543                         *tl = tdstateid.other[2];
2544                 }
2545                 if (ret > 0) {
2546                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2547                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2548                         np = VTONFS(fdvp);
2549                         (void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2550                             np->n_fhp->nfh_len, 0);
2551                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2552                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2553                 }
2554         } else {
2555                 ret = 0;
2556         }
2557         if (ret == 0)
2558                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2559         if (nd->nd_flag & ND_NFSV4) {
2560                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2561                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2562                 NFSWCCATTR_ATTRBIT(&attrbits);
2563                 (void) nfsrv_putattrbit(nd, &attrbits);
2564                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2565                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2566                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2567                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2568                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2569                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2570                 (void) nfsrv_putattrbit(nd, &attrbits);
2571                 nd->nd_flag |= ND_V4WCCATTR;
2572                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2573                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2574         }
2575         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2576         if (!(nd->nd_flag & ND_NFSV4))
2577                 (void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2578                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2579         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2580         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2581         if (error)
2582                 return (error);
2583         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2584                 /* For NFSv4, parse out any Delereturn replies. */
2585                 if (ret > 0 && nd->nd_repstat != 0 &&
2586                     (nd->nd_flag & ND_NOMOREDATA)) {
2587                         /*
2588                          * If the Delegreturn failed, try again without
2589                          * it. The server will Recall, as required.
2590                          */
2591                         m_freem(nd->nd_mrep);
2592                         goto tryagain;
2593                 }
2594                 for (i = 0; i < (ret * 2); i++) {
2595                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2596                             ND_NFSV4) {
2597                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2598                             if (*(tl + 1)) {
2599                                 if (i == 0 && ret > 1) {
2600                                     /*
2601                                      * If the Delegreturn failed, try again
2602                                      * without it. The server will Recall, as
2603                                      * required.
2604                                      * If ret > 1, the first iteration of this
2605                                      * loop is the second DelegReturn result.
2606                                      */
2607                                     m_freem(nd->nd_mrep);
2608                                     goto tryagain;
2609                                 } else {
2610                                     nd->nd_flag |= ND_NOMOREDATA;
2611                                 }
2612                             }
2613                         }
2614                 }
2615                 /* Now, the first wcc attribute reply. */
2616                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2617                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2618                         if (*(tl + 1))
2619                                 nd->nd_flag |= ND_NOMOREDATA;
2620                 }
2621                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2622                     fstuff);
2623                 /* and the second wcc attribute reply. */
2624                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2625                     !error) {
2626                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2627                         if (*(tl + 1))
2628                                 nd->nd_flag |= ND_NOMOREDATA;
2629                 }
2630                 if (!error)
2631                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2632                             NULL, tstuff);
2633         }
2634         if (nd->nd_repstat && !error)
2635                 error = nd->nd_repstat;
2636 nfsmout:
2637         m_freem(nd->nd_mrep);
2638         return (error);
2639 }
2640
2641 /*
2642  * nfs hard link create rpc
2643  */
2644 int
2645 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2646     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2647     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2648 {
2649         u_int32_t *tl;
2650         struct nfsrv_descript nfsd, *nd = &nfsd;
2651         nfsattrbit_t attrbits;
2652         int error = 0;
2653
2654         *attrflagp = 0;
2655         *dattrflagp = 0;
2656         if (namelen > NFS_MAXNAMLEN)
2657                 return (ENAMETOOLONG);
2658         NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2659         if (nd->nd_flag & ND_NFSV4) {
2660                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2661                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2662         }
2663         (void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2664                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2665         if (nd->nd_flag & ND_NFSV4) {
2666                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2667                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2668                 NFSWCCATTR_ATTRBIT(&attrbits);
2669                 (void) nfsrv_putattrbit(nd, &attrbits);
2670                 nd->nd_flag |= ND_V4WCCATTR;
2671                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2672                 *tl = txdr_unsigned(NFSV4OP_LINK);
2673         }
2674         (void) nfsm_strtom(nd, name, namelen);
2675         error = nfscl_request(nd, vp, p, cred, dstuff);
2676         if (error)
2677                 return (error);
2678         if (nd->nd_flag & ND_NFSV3) {
2679                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2680                 if (!error)
2681                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2682                             NULL, dstuff);
2683         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2684                 /*
2685                  * First, parse out the PutFH and Getattr result.
2686                  */
2687                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2688                 if (!(*(tl + 1)))
2689                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2690                 if (*(tl + 1))
2691                         nd->nd_flag |= ND_NOMOREDATA;
2692                 /*
2693                  * Get the pre-op attributes.
2694                  */
2695                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2696         }
2697         if (nd->nd_repstat && !error)
2698                 error = nd->nd_repstat;
2699 nfsmout:
2700         m_freem(nd->nd_mrep);
2701         return (error);
2702 }
2703
2704 /*
2705  * nfs symbolic link create rpc
2706  */
2707 int
2708 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
2709     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2710     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2711     int *dattrflagp, void *dstuff)
2712 {
2713         u_int32_t *tl;
2714         struct nfsrv_descript nfsd, *nd = &nfsd;
2715         struct nfsmount *nmp;
2716         int slen, error = 0;
2717
2718         *nfhpp = NULL;
2719         *attrflagp = 0;
2720         *dattrflagp = 0;
2721         nmp = VFSTONFS(dvp->v_mount);
2722         slen = strlen(target);
2723         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2724                 return (ENAMETOOLONG);
2725         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2726         if (nd->nd_flag & ND_NFSV4) {
2727                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2728                 *tl = txdr_unsigned(NFLNK);
2729                 (void) nfsm_strtom(nd, target, slen);
2730         }
2731         (void) nfsm_strtom(nd, name, namelen);
2732         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2733                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2734         if (!(nd->nd_flag & ND_NFSV4))
2735                 (void) nfsm_strtom(nd, target, slen);
2736         if (nd->nd_flag & ND_NFSV2)
2737                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2738         error = nfscl_request(nd, dvp, p, cred, dstuff);
2739         if (error)
2740                 return (error);
2741         if (nd->nd_flag & ND_NFSV4)
2742                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2743         if ((nd->nd_flag & ND_NFSV3) && !error) {
2744                 if (!nd->nd_repstat)
2745                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2746                 if (!error)
2747                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2748                             NULL, dstuff);
2749         }
2750         if (nd->nd_repstat && !error)
2751                 error = nd->nd_repstat;
2752         m_freem(nd->nd_mrep);
2753         /*
2754          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2755          * Only do this if vfs.nfs.ignore_eexist is set.
2756          * Never do this for NFSv4.1 or later minor versions, since sessions
2757          * should guarantee "exactly once" RPC semantics.
2758          */
2759         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2760             nmp->nm_minorvers == 0))
2761                 error = 0;
2762         return (error);
2763 }
2764
2765 /*
2766  * nfs make dir rpc
2767  */
2768 int
2769 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2770     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2771     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2772     int *dattrflagp, void *dstuff)
2773 {
2774         u_int32_t *tl;
2775         struct nfsrv_descript nfsd, *nd = &nfsd;
2776         nfsattrbit_t attrbits;
2777         int error = 0;
2778         struct nfsfh *fhp;
2779         struct nfsmount *nmp;
2780
2781         *nfhpp = NULL;
2782         *attrflagp = 0;
2783         *dattrflagp = 0;
2784         nmp = VFSTONFS(dvp->v_mount);
2785         fhp = VTONFS(dvp)->n_fhp;
2786         if (namelen > NFS_MAXNAMLEN)
2787                 return (ENAMETOOLONG);
2788         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2789         if (nd->nd_flag & ND_NFSV4) {
2790                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2791                 *tl = txdr_unsigned(NFDIR);
2792         }
2793         (void) nfsm_strtom(nd, name, namelen);
2794         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2795         if (nd->nd_flag & ND_NFSV4) {
2796                 NFSGETATTR_ATTRBIT(&attrbits);
2797                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2798                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2799                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2800                 (void) nfsrv_putattrbit(nd, &attrbits);
2801                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2802                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2803                 (void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2804                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2805                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2806                 (void) nfsrv_putattrbit(nd, &attrbits);
2807         }
2808         error = nfscl_request(nd, dvp, p, cred, dstuff);
2809         if (error)
2810                 return (error);
2811         if (nd->nd_flag & ND_NFSV4)
2812                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2813         if (!nd->nd_repstat && !error) {
2814                 if (nd->nd_flag & ND_NFSV4) {
2815                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2816                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2817                 }
2818                 if (!error)
2819                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2820                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2821                         /* Get rid of the PutFH and Getattr status values. */
2822                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2823                         /* Load the directory attributes. */
2824                         error = nfsm_loadattr(nd, dnap);
2825                         if (error == 0)
2826                                 *dattrflagp = 1;
2827                 }
2828         }
2829         if ((nd->nd_flag & ND_NFSV3) && !error)
2830                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2831         if (nd->nd_repstat && !error)
2832                 error = nd->nd_repstat;
2833 nfsmout:
2834         m_freem(nd->nd_mrep);
2835         /*
2836          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2837          * Only do this if vfs.nfs.ignore_eexist is set.
2838          * Never do this for NFSv4.1 or later minor versions, since sessions
2839          * should guarantee "exactly once" RPC semantics.
2840          */
2841         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2842             nmp->nm_minorvers == 0))
2843                 error = 0;
2844         return (error);
2845 }
2846
2847 /*
2848  * nfs remove directory call
2849  */
2850 int
2851 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2852     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2853 {
2854         struct nfsrv_descript nfsd, *nd = &nfsd;
2855         int error = 0;
2856
2857         *dattrflagp = 0;
2858         if (namelen > NFS_MAXNAMLEN)
2859                 return (ENAMETOOLONG);
2860         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2861         (void) nfsm_strtom(nd, name, namelen);
2862         error = nfscl_request(nd, dvp, p, cred, dstuff);
2863         if (error)
2864                 return (error);
2865         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2866                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2867         if (nd->nd_repstat && !error)
2868                 error = nd->nd_repstat;
2869         m_freem(nd->nd_mrep);
2870         /*
2871          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2872          */
2873         if (error == ENOENT)
2874                 error = 0;
2875         return (error);
2876 }
2877
2878 /*
2879  * Readdir rpc.
2880  * Always returns with either uio_resid unchanged, if you are at the
2881  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2882  * filled in.
2883  * I felt this would allow caching of directory blocks more easily
2884  * than returning a pertially filled block.
2885  * Directory offset cookies:
2886  * Oh my, what to do with them...
2887  * I can think of three ways to deal with them:
2888  * 1 - have the layer above these RPCs maintain a map between logical
2889  *     directory byte offsets and the NFS directory offset cookies
2890  * 2 - pass the opaque directory offset cookies up into userland
2891  *     and let the libc functions deal with them, via the system call
2892  * 3 - return them to userland in the "struct dirent", so future versions
2893  *     of libc can use them and do whatever is necessary to make things work
2894  *     above these rpc calls, in the meantime
2895  * For now, I do #3 by "hiding" the directory offset cookies after the
2896  * d_name field in struct dirent. This is space inside d_reclen that
2897  * will be ignored by anything that doesn't know about them.
2898  * The directory offset cookies are filled in as the last 8 bytes of
2899  * each directory entry, after d_name. Someday, the userland libc
2900  * functions may be able to use these. In the meantime, it satisfies
2901  * OpenBSD's requirements for cookies being returned.
2902  * If expects the directory offset cookie for the read to be in uio_offset
2903  * and returns the one for the next entry after this directory block in
2904  * there, as well.
2905  */
2906 int
2907 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2908     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2909     int *eofp, void *stuff)
2910 {
2911         int len, left;
2912         struct dirent *dp = NULL;
2913         u_int32_t *tl;
2914         nfsquad_t cookie, ncookie;
2915         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2916         struct nfsnode *dnp = VTONFS(vp);
2917         struct nfsvattr nfsva;
2918         struct nfsrv_descript nfsd, *nd = &nfsd;
2919         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2920         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2921         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
2922         char *cp;
2923         nfsattrbit_t attrbits, dattrbits;
2924         u_int32_t rderr, *tl2 = NULL;
2925         size_t tresid;
2926
2927         KASSERT(uiop->uio_iovcnt == 1 &&
2928             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
2929             ("nfs readdirrpc bad uio"));
2930         ncookie.lval[0] = ncookie.lval[1] = 0;
2931         /*
2932          * There is no point in reading a lot more than uio_resid, however
2933          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2934          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2935          * will never make readsize > nm_readdirsize.
2936          */
2937         readsize = nmp->nm_readdirsize;
2938         if (readsize > uiop->uio_resid)
2939                 readsize = uiop->uio_resid + DIRBLKSIZ;
2940
2941         *attrflagp = 0;
2942         if (eofp)
2943                 *eofp = 0;
2944         tresid = uiop->uio_resid;
2945         cookie.lval[0] = cookiep->nfsuquad[0];
2946         cookie.lval[1] = cookiep->nfsuquad[1];
2947         nd->nd_mrep = NULL;
2948
2949         /*
2950          * For NFSv4, first create the "." and ".." entries.
2951          */
2952         if (NFSHASNFSV4(nmp)) {
2953                 reqsize = 6 * NFSX_UNSIGNED;
2954                 NFSGETATTR_ATTRBIT(&dattrbits);
2955                 NFSZERO_ATTRBIT(&attrbits);
2956                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2957                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2958                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2959                     NFSATTRBIT_MOUNTEDONFILEID)) {
2960                         NFSSETBIT_ATTRBIT(&attrbits,
2961                             NFSATTRBIT_MOUNTEDONFILEID);
2962                         gotmnton = 1;
2963                 } else {
2964                         /*
2965                          * Must fake it. Use the fileno, except when the
2966                          * fsid is != to that of the directory. For that
2967                          * case, generate a fake fileno that is not the same.
2968                          */
2969                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2970                         gotmnton = 0;
2971                 }
2972
2973                 /*
2974                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2975                  */
2976                 if (uiop->uio_offset == 0) {
2977                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2978                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2979                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2980                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2981                         (void) nfsrv_putattrbit(nd, &attrbits);
2982                         error = nfscl_request(nd, vp, p, cred, stuff);
2983                         if (error)
2984                             return (error);
2985                         dotfileid = 0;  /* Fake out the compiler. */
2986                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2987                             error = nfsm_loadattr(nd, &nfsva);
2988                             if (error != 0)
2989                                 goto nfsmout;
2990                             dotfileid = nfsva.na_fileid;
2991                         }
2992                         if (nd->nd_repstat == 0) {
2993                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2994                             len = fxdr_unsigned(int, *(tl + 4));
2995                             if (len > 0 && len <= NFSX_V4FHMAX)
2996                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2997                             else
2998                                 error = EPERM;
2999                             if (!error) {
3000                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3001                                 nfsva.na_mntonfileno = UINT64_MAX;
3002                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3003                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3004                                     NULL, NULL, NULL, p, cred);
3005                                 if (error) {
3006                                     dotdotfileid = dotfileid;
3007                                 } else if (gotmnton) {
3008                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3009                                         dotdotfileid = nfsva.na_mntonfileno;
3010                                     else
3011                                         dotdotfileid = nfsva.na_fileid;
3012                                 } else if (nfsva.na_filesid[0] ==
3013                                     dnp->n_vattr.na_filesid[0] &&
3014                                     nfsva.na_filesid[1] ==
3015                                     dnp->n_vattr.na_filesid[1]) {
3016                                     dotdotfileid = nfsva.na_fileid;
3017                                 } else {
3018                                     do {
3019                                         fakefileno--;
3020                                     } while (fakefileno ==
3021                                         nfsva.na_fileid);
3022                                     dotdotfileid = fakefileno;
3023                                 }
3024                             }
3025                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3026                             /*
3027                              * Lookupp returns NFSERR_NOENT when we are
3028                              * at the root, so just use the current dir.
3029                              */
3030                             nd->nd_repstat = 0;
3031                             dotdotfileid = dotfileid;
3032                         } else {
3033                             error = nd->nd_repstat;
3034                         }
3035                         m_freem(nd->nd_mrep);
3036                         if (error)
3037                             return (error);
3038                         nd->nd_mrep = NULL;
3039                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3040                         dp->d_pad0 = dp->d_pad1 = 0;
3041                         dp->d_off = 0;
3042                         dp->d_type = DT_DIR;
3043                         dp->d_fileno = dotfileid;
3044                         dp->d_namlen = 1;
3045                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3046                         dp->d_name[0] = '.';
3047                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3048                         /*
3049                          * Just make these offset cookie 0.
3050                          */
3051                         tl = (u_int32_t *)&dp->d_name[8];
3052                         *tl++ = 0;
3053                         *tl = 0;
3054                         blksiz += dp->d_reclen;
3055                         uiop->uio_resid -= dp->d_reclen;
3056                         uiop->uio_offset += dp->d_reclen;
3057                         uiop->uio_iov->iov_base =
3058                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3059                         uiop->uio_iov->iov_len -= dp->d_reclen;
3060                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3061                         dp->d_pad0 = dp->d_pad1 = 0;
3062                         dp->d_off = 0;
3063                         dp->d_type = DT_DIR;
3064                         dp->d_fileno = dotdotfileid;
3065                         dp->d_namlen = 2;
3066                         *((uint64_t *)dp->d_name) = 0;
3067                         dp->d_name[0] = '.';
3068                         dp->d_name[1] = '.';
3069                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3070                         /*
3071                          * Just make these offset cookie 0.
3072                          */
3073                         tl = (u_int32_t *)&dp->d_name[8];
3074                         *tl++ = 0;
3075                         *tl = 0;
3076                         blksiz += dp->d_reclen;
3077                         uiop->uio_resid -= dp->d_reclen;
3078                         uiop->uio_offset += dp->d_reclen;
3079                         uiop->uio_iov->iov_base =
3080                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3081                         uiop->uio_iov->iov_len -= dp->d_reclen;
3082                 }
3083                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3084         } else {
3085                 reqsize = 5 * NFSX_UNSIGNED;
3086         }
3087
3088         /*
3089          * Loop around doing readdir rpc's of size readsize.
3090          * The stopping criteria is EOF or buffer full.
3091          */
3092         while (more_dirs && bigenough) {
3093                 *attrflagp = 0;
3094                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
3095                 if (nd->nd_flag & ND_NFSV2) {
3096                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3097                         *tl++ = cookie.lval[1];
3098                         *tl = txdr_unsigned(readsize);
3099                 } else {
3100                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3101                         *tl++ = cookie.lval[0];
3102                         *tl++ = cookie.lval[1];
3103                         if (cookie.qval == 0) {
3104                                 *tl++ = 0;
3105                                 *tl++ = 0;
3106                         } else {
3107                                 NFSLOCKNODE(dnp);
3108                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3109                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3110                                 NFSUNLOCKNODE(dnp);
3111                         }
3112                         if (nd->nd_flag & ND_NFSV4) {
3113                                 *tl++ = txdr_unsigned(readsize);
3114                                 *tl = txdr_unsigned(readsize);
3115                                 (void) nfsrv_putattrbit(nd, &attrbits);
3116                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3117                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3118                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3119                         } else {
3120                                 *tl = txdr_unsigned(readsize);
3121                         }
3122                 }
3123                 error = nfscl_request(nd, vp, p, cred, stuff);
3124                 if (error)
3125                         return (error);
3126                 if (!(nd->nd_flag & ND_NFSV2)) {
3127                         if (nd->nd_flag & ND_NFSV3)
3128                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3129                                     stuff);
3130                         if (!nd->nd_repstat && !error) {
3131                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3132                                 NFSLOCKNODE(dnp);
3133                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3134                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3135                                 NFSUNLOCKNODE(dnp);
3136                         }
3137                 }
3138                 if (nd->nd_repstat || error) {
3139                         if (!error)
3140                                 error = nd->nd_repstat;
3141                         goto nfsmout;
3142                 }
3143                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3144                 more_dirs = fxdr_unsigned(int, *tl);
3145                 if (!more_dirs)
3146                         tryformoredirs = 0;
3147
3148                 /* loop through the dir entries, doctoring them to 4bsd form */
3149                 while (more_dirs && bigenough) {
3150                         if (nd->nd_flag & ND_NFSV4) {
3151                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3152                                 ncookie.lval[0] = *tl++;
3153                                 ncookie.lval[1] = *tl++;
3154                                 len = fxdr_unsigned(int, *tl);
3155                         } else if (nd->nd_flag & ND_NFSV3) {
3156                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3157                                 nfsva.na_fileid = fxdr_hyper(tl);
3158                                 tl += 2;
3159                                 len = fxdr_unsigned(int, *tl);
3160                         } else {
3161                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3162                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3163                                     *tl++);
3164                                 len = fxdr_unsigned(int, *tl);
3165                         }
3166                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3167                                 error = EBADRPC;
3168                                 goto nfsmout;
3169                         }
3170                         tlen = roundup2(len, 8);
3171                         if (tlen == len)
3172                                 tlen += 8;  /* To ensure null termination. */
3173                         left = DIRBLKSIZ - blksiz;
3174                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3175                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3176                                 dp->d_reclen += left;
3177                                 uiop->uio_iov->iov_base =
3178                                     (char *)uiop->uio_iov->iov_base + left;
3179                                 uiop->uio_iov->iov_len -= left;
3180                                 uiop->uio_resid -= left;
3181                                 uiop->uio_offset += left;
3182                                 blksiz = 0;
3183                         }
3184                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3185                             uiop->uio_resid)
3186                                 bigenough = 0;
3187                         if (bigenough) {
3188                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3189                                 dp->d_pad0 = dp->d_pad1 = 0;
3190                                 dp->d_off = 0;
3191                                 dp->d_namlen = len;
3192                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3193                                     NFSX_HYPER;
3194                                 dp->d_type = DT_UNKNOWN;
3195                                 blksiz += dp->d_reclen;
3196                                 if (blksiz == DIRBLKSIZ)
3197                                         blksiz = 0;
3198                                 uiop->uio_resid -= DIRHDSIZ;
3199                                 uiop->uio_offset += DIRHDSIZ;
3200                                 uiop->uio_iov->iov_base =
3201                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3202                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3203                                 error = nfsm_mbufuio(nd, uiop, len);
3204                                 if (error)
3205                                         goto nfsmout;
3206                                 cp = uiop->uio_iov->iov_base;
3207                                 tlen -= len;
3208                                 NFSBZERO(cp, tlen);
3209                                 cp += tlen;     /* points to cookie storage */
3210                                 tl2 = (u_int32_t *)cp;
3211                                 uiop->uio_iov->iov_base =
3212                                     (char *)uiop->uio_iov->iov_base + tlen +
3213                                     NFSX_HYPER;
3214                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3215                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3216                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3217                         } else {
3218                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3219                                 if (error)
3220                                         goto nfsmout;
3221                         }
3222                         if (nd->nd_flag & ND_NFSV4) {
3223                                 rderr = 0;
3224                                 nfsva.na_mntonfileno = UINT64_MAX;
3225                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3226                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3227                                     NULL, NULL, &rderr, p, cred);
3228                                 if (error)
3229                                         goto nfsmout;
3230                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3231                         } else if (nd->nd_flag & ND_NFSV3) {
3232                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3233                                 ncookie.lval[0] = *tl++;
3234                                 ncookie.lval[1] = *tl++;
3235                         } else {
3236                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3237                                 ncookie.lval[0] = 0;
3238                                 ncookie.lval[1] = *tl++;
3239                         }
3240                         if (bigenough) {
3241                             if (nd->nd_flag & ND_NFSV4) {
3242                                 if (rderr) {
3243                                     dp->d_fileno = 0;
3244                                 } else {
3245                                     if (gotmnton) {
3246                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3247                                             dp->d_fileno = nfsva.na_mntonfileno;
3248                                         else
3249                                             dp->d_fileno = nfsva.na_fileid;
3250                                     } else if (nfsva.na_filesid[0] ==
3251                                         dnp->n_vattr.na_filesid[0] &&
3252                                         nfsva.na_filesid[1] ==
3253                                         dnp->n_vattr.na_filesid[1]) {
3254                                         dp->d_fileno = nfsva.na_fileid;
3255                                     } else {
3256                                         do {
3257                                             fakefileno--;
3258                                         } while (fakefileno ==
3259                                             nfsva.na_fileid);
3260                                         dp->d_fileno = fakefileno;
3261                                     }
3262                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3263                                 }
3264                             } else {
3265                                 dp->d_fileno = nfsva.na_fileid;
3266                             }
3267                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3268                                 ncookie.lval[0];
3269                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3270                                 ncookie.lval[1];
3271                         }
3272                         more_dirs = fxdr_unsigned(int, *tl);
3273                 }
3274                 /*
3275                  * If at end of rpc data, get the eof boolean
3276                  */
3277                 if (!more_dirs) {
3278                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3279                         eof = fxdr_unsigned(int, *tl);
3280                         if (tryformoredirs)
3281                                 more_dirs = !eof;
3282                         if (nd->nd_flag & ND_NFSV4) {
3283                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3284                                     stuff);
3285                                 if (error)
3286                                         goto nfsmout;
3287                         }
3288                 }
3289                 m_freem(nd->nd_mrep);
3290                 nd->nd_mrep = NULL;
3291         }
3292         /*
3293          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3294          * by increasing d_reclen for the last record.
3295          */
3296         if (blksiz > 0) {
3297                 left = DIRBLKSIZ - blksiz;
3298                 NFSBZERO(uiop->uio_iov->iov_base, left);
3299                 dp->d_reclen += left;
3300                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3301                     left;
3302                 uiop->uio_iov->iov_len -= left;
3303                 uiop->uio_resid -= left;
3304                 uiop->uio_offset += left;
3305         }
3306
3307         /*
3308          * If returning no data, assume end of file.
3309          * If not bigenough, return not end of file, since you aren't
3310          *    returning all the data
3311          * Otherwise, return the eof flag from the server.
3312          */
3313         if (eofp) {
3314                 if (tresid == ((size_t)(uiop->uio_resid)))
3315                         *eofp = 1;
3316                 else if (!bigenough)
3317                         *eofp = 0;
3318                 else
3319                         *eofp = eof;
3320         }
3321
3322         /*
3323          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3324          */
3325         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3326                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3327                 NFSBZERO(dp, DIRBLKSIZ);
3328                 dp->d_type = DT_UNKNOWN;
3329                 tl = (u_int32_t *)&dp->d_name[4];
3330                 *tl++ = cookie.lval[0];
3331                 *tl = cookie.lval[1];
3332                 dp->d_reclen = DIRBLKSIZ;
3333                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3334                     DIRBLKSIZ;
3335                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3336                 uiop->uio_resid -= DIRBLKSIZ;
3337                 uiop->uio_offset += DIRBLKSIZ;
3338         }
3339
3340 nfsmout:
3341         if (nd->nd_mrep != NULL)
3342                 m_freem(nd->nd_mrep);
3343         return (error);
3344 }
3345
3346 #ifndef APPLE
3347 /*
3348  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3349  * (Also used for NFS V4 when mount flag set.)
3350  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3351  */
3352 int
3353 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3354     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3355     int *eofp, void *stuff)
3356 {
3357         int len, left;
3358         struct dirent *dp = NULL;
3359         u_int32_t *tl;
3360         vnode_t newvp = NULLVP;
3361         struct nfsrv_descript nfsd, *nd = &nfsd;
3362         struct nameidata nami, *ndp = &nami;
3363         struct componentname *cnp = &ndp->ni_cnd;
3364         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3365         struct nfsnode *dnp = VTONFS(vp), *np;
3366         struct nfsvattr nfsva;
3367         struct nfsfh *nfhp;
3368         nfsquad_t cookie, ncookie;
3369         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3370         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3371         int isdotdot = 0, unlocknewvp = 0;
3372         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3373         u_int64_t fileno = 0;
3374         char *cp;
3375         nfsattrbit_t attrbits, dattrbits;
3376         size_t tresid;
3377         u_int32_t *tl2 = NULL, rderr;
3378         struct timespec dctime;
3379
3380         KASSERT(uiop->uio_iovcnt == 1 &&
3381             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3382             ("nfs readdirplusrpc bad uio"));
3383         ncookie.lval[0] = ncookie.lval[1] = 0;
3384         timespecclear(&dctime);
3385         *attrflagp = 0;
3386         if (eofp != NULL)
3387                 *eofp = 0;
3388         ndp->ni_dvp = vp;
3389         nd->nd_mrep = NULL;
3390         cookie.lval[0] = cookiep->nfsuquad[0];
3391         cookie.lval[1] = cookiep->nfsuquad[1];
3392         tresid = uiop->uio_resid;
3393
3394         /*
3395          * For NFSv4, first create the "." and ".." entries.
3396          */
3397         if (NFSHASNFSV4(nmp)) {
3398                 NFSGETATTR_ATTRBIT(&dattrbits);
3399                 NFSZERO_ATTRBIT(&attrbits);
3400                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3401                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3402                     NFSATTRBIT_MOUNTEDONFILEID)) {
3403                         NFSSETBIT_ATTRBIT(&attrbits,
3404                             NFSATTRBIT_MOUNTEDONFILEID);
3405                         gotmnton = 1;
3406                 } else {
3407                         /*
3408                          * Must fake it. Use the fileno, except when the
3409                          * fsid is != to that of the directory. For that
3410                          * case, generate a fake fileno that is not the same.
3411                          */
3412                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3413                         gotmnton = 0;
3414                 }
3415
3416                 /*
3417                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3418                  */
3419                 if (uiop->uio_offset == 0) {
3420                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3421                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3422                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3423                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3424                         (void) nfsrv_putattrbit(nd, &attrbits);
3425                         error = nfscl_request(nd, vp, p, cred, stuff);
3426                         if (error)
3427                             return (error);
3428                         dotfileid = 0;  /* Fake out the compiler. */
3429                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3430                             error = nfsm_loadattr(nd, &nfsva);
3431                             if (error != 0)
3432                                 goto nfsmout;
3433                             dctime = nfsva.na_ctime;
3434                             dotfileid = nfsva.na_fileid;
3435                         }
3436                         if (nd->nd_repstat == 0) {
3437                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3438                             len = fxdr_unsigned(int, *(tl + 4));
3439                             if (len > 0 && len <= NFSX_V4FHMAX)
3440                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3441                             else
3442                                 error = EPERM;
3443                             if (!error) {
3444                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3445                                 nfsva.na_mntonfileno = UINT64_MAX;
3446                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3447                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3448                                     NULL, NULL, NULL, p, cred);
3449                                 if (error) {
3450                                     dotdotfileid = dotfileid;
3451                                 } else if (gotmnton) {
3452                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3453                                         dotdotfileid = nfsva.na_mntonfileno;
3454                                     else
3455                                         dotdotfileid = nfsva.na_fileid;
3456                                 } else if (nfsva.na_filesid[0] ==
3457                                     dnp->n_vattr.na_filesid[0] &&
3458                                     nfsva.na_filesid[1] ==
3459                                     dnp->n_vattr.na_filesid[1]) {
3460                                     dotdotfileid = nfsva.na_fileid;
3461                                 } else {
3462                                     do {
3463                                         fakefileno--;
3464                                     } while (fakefileno ==
3465                                         nfsva.na_fileid);
3466                                     dotdotfileid = fakefileno;
3467                                 }
3468                             }
3469                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3470                             /*
3471                              * Lookupp returns NFSERR_NOENT when we are
3472                              * at the root, so just use the current dir.
3473                              */
3474                             nd->nd_repstat = 0;
3475                             dotdotfileid = dotfileid;
3476                         } else {
3477                             error = nd->nd_repstat;
3478                         }
3479                         m_freem(nd->nd_mrep);
3480                         if (error)
3481                             return (error);
3482                         nd->nd_mrep = NULL;
3483                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3484                         dp->d_pad0 = dp->d_pad1 = 0;
3485                         dp->d_off = 0;
3486                         dp->d_type = DT_DIR;
3487                         dp->d_fileno = dotfileid;
3488                         dp->d_namlen = 1;
3489                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3490                         dp->d_name[0] = '.';
3491                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3492                         /*
3493                          * Just make these offset cookie 0.
3494                          */
3495                         tl = (u_int32_t *)&dp->d_name[8];
3496                         *tl++ = 0;
3497                         *tl = 0;
3498                         blksiz += dp->d_reclen;
3499                         uiop->uio_resid -= dp->d_reclen;
3500                         uiop->uio_offset += dp->d_reclen;
3501                         uiop->uio_iov->iov_base =
3502                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3503                         uiop->uio_iov->iov_len -= dp->d_reclen;
3504                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3505                         dp->d_pad0 = dp->d_pad1 = 0;
3506                         dp->d_off = 0;
3507                         dp->d_type = DT_DIR;
3508                         dp->d_fileno = dotdotfileid;
3509                         dp->d_namlen = 2;
3510                         *((uint64_t *)dp->d_name) = 0;
3511                         dp->d_name[0] = '.';
3512                         dp->d_name[1] = '.';
3513                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3514                         /*
3515                          * Just make these offset cookie 0.
3516                          */
3517                         tl = (u_int32_t *)&dp->d_name[8];
3518                         *tl++ = 0;
3519                         *tl = 0;
3520                         blksiz += dp->d_reclen;
3521                         uiop->uio_resid -= dp->d_reclen;
3522                         uiop->uio_offset += dp->d_reclen;
3523                         uiop->uio_iov->iov_base =
3524                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3525                         uiop->uio_iov->iov_len -= dp->d_reclen;
3526                 }
3527                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3528                 if (gotmnton)
3529                         NFSSETBIT_ATTRBIT(&attrbits,
3530                             NFSATTRBIT_MOUNTEDONFILEID);
3531         }
3532
3533         /*
3534          * Loop around doing readdir rpc's of size nm_readdirsize.
3535          * The stopping criteria is EOF or buffer full.
3536          */
3537         while (more_dirs && bigenough) {
3538                 *attrflagp = 0;
3539                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3540                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3541                 *tl++ = cookie.lval[0];
3542                 *tl++ = cookie.lval[1];
3543                 if (cookie.qval == 0) {
3544                         *tl++ = 0;
3545                         *tl++ = 0;
3546                 } else {
3547                         NFSLOCKNODE(dnp);
3548                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3549                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3550                         NFSUNLOCKNODE(dnp);
3551                 }
3552                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3553                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3554                 if (nd->nd_flag & ND_NFSV4) {
3555                         (void) nfsrv_putattrbit(nd, &attrbits);
3556                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3557                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3558                         (void) nfsrv_putattrbit(nd, &dattrbits);
3559                 }
3560                 error = nfscl_request(nd, vp, p, cred, stuff);
3561                 if (error)
3562                         return (error);
3563                 if (nd->nd_flag & ND_NFSV3)
3564                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3565                 if (nd->nd_repstat || error) {
3566                         if (!error)
3567                                 error = nd->nd_repstat;
3568                         goto nfsmout;
3569                 }
3570                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3571                         dctime = nap->na_ctime;
3572                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3573                 NFSLOCKNODE(dnp);
3574                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3575                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3576                 NFSUNLOCKNODE(dnp);
3577                 more_dirs = fxdr_unsigned(int, *tl);
3578                 if (!more_dirs)
3579                         tryformoredirs = 0;
3580
3581                 /* loop through the dir entries, doctoring them to 4bsd form */
3582                 while (more_dirs && bigenough) {
3583                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3584                         if (nd->nd_flag & ND_NFSV4) {
3585                                 ncookie.lval[0] = *tl++;
3586                                 ncookie.lval[1] = *tl++;
3587                         } else {
3588                                 fileno = fxdr_hyper(tl);
3589                                 tl += 2;
3590                         }
3591                         len = fxdr_unsigned(int, *tl);
3592                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3593                                 error = EBADRPC;
3594                                 goto nfsmout;
3595                         }
3596                         tlen = roundup2(len, 8);
3597                         if (tlen == len)
3598                                 tlen += 8;  /* To ensure null termination. */
3599                         left = DIRBLKSIZ - blksiz;
3600                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3601                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3602                                 dp->d_reclen += left;
3603                                 uiop->uio_iov->iov_base =
3604                                     (char *)uiop->uio_iov->iov_base + left;
3605                                 uiop->uio_iov->iov_len -= left;
3606                                 uiop->uio_resid -= left;
3607                                 uiop->uio_offset += left;
3608                                 blksiz = 0;
3609                         }
3610                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3611                             uiop->uio_resid)
3612                                 bigenough = 0;
3613                         if (bigenough) {
3614                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3615                                 dp->d_pad0 = dp->d_pad1 = 0;
3616                                 dp->d_off = 0;
3617                                 dp->d_namlen = len;
3618                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3619                                     NFSX_HYPER;
3620                                 dp->d_type = DT_UNKNOWN;
3621                                 blksiz += dp->d_reclen;
3622                                 if (blksiz == DIRBLKSIZ)
3623                                         blksiz = 0;
3624                                 uiop->uio_resid -= DIRHDSIZ;
3625                                 uiop->uio_offset += DIRHDSIZ;
3626                                 uiop->uio_iov->iov_base =
3627                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3628                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3629                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
3630                                 cnp->cn_namelen = len;
3631                                 NFSCNHASHZERO(cnp);
3632                                 error = nfsm_mbufuio(nd, uiop, len);
3633                                 if (error)
3634                                         goto nfsmout;
3635                                 cp = uiop->uio_iov->iov_base;
3636                                 tlen -= len;
3637                                 NFSBZERO(cp, tlen);
3638                                 cp += tlen;     /* points to cookie storage */
3639                                 tl2 = (u_int32_t *)cp;
3640                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3641                                     cnp->cn_nameptr[1] == '.')
3642                                         isdotdot = 1;
3643                                 else
3644                                         isdotdot = 0;
3645                                 uiop->uio_iov->iov_base =
3646                                     (char *)uiop->uio_iov->iov_base + tlen +
3647                                     NFSX_HYPER;
3648                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3649                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3650                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3651                         } else {
3652                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3653                                 if (error)
3654                                         goto nfsmout;
3655                         }
3656                         nfhp = NULL;
3657                         if (nd->nd_flag & ND_NFSV3) {
3658                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3659                                 ncookie.lval[0] = *tl++;
3660                                 ncookie.lval[1] = *tl++;
3661                                 attrflag = fxdr_unsigned(int, *tl);
3662                                 if (attrflag) {
3663                                   error = nfsm_loadattr(nd, &nfsva);
3664                                   if (error)
3665                                         goto nfsmout;
3666                                 }
3667                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3668                                 if (*tl) {
3669                                         error = nfsm_getfh(nd, &nfhp);
3670                                         if (error)
3671                                             goto nfsmout;
3672                                 }
3673                                 if (!attrflag && nfhp != NULL) {
3674                                         free(nfhp, M_NFSFH);
3675                                         nfhp = NULL;
3676                                 }
3677                         } else {
3678                                 rderr = 0;
3679                                 nfsva.na_mntonfileno = 0xffffffff;
3680                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3681                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3682                                     NULL, NULL, &rderr, p, cred);
3683                                 if (error)
3684                                         goto nfsmout;
3685                         }
3686
3687                         if (bigenough) {
3688                             if (nd->nd_flag & ND_NFSV4) {
3689                                 if (rderr) {
3690                                     dp->d_fileno = 0;
3691                                 } else if (gotmnton) {
3692                                     if (nfsva.na_mntonfileno != 0xffffffff)
3693                                         dp->d_fileno = nfsva.na_mntonfileno;
3694                                     else
3695                                         dp->d_fileno = nfsva.na_fileid;
3696                                 } else if (nfsva.na_filesid[0] ==
3697                                     dnp->n_vattr.na_filesid[0] &&
3698                                     nfsva.na_filesid[1] ==
3699                                     dnp->n_vattr.na_filesid[1]) {
3700                                     dp->d_fileno = nfsva.na_fileid;
3701                                 } else {
3702                                     do {
3703                                         fakefileno--;
3704                                     } while (fakefileno ==
3705                                         nfsva.na_fileid);
3706                                     dp->d_fileno = fakefileno;
3707                                 }
3708                             } else {
3709                                 dp->d_fileno = fileno;
3710                             }
3711                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3712                                 ncookie.lval[0];
3713                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3714                                 ncookie.lval[1];
3715
3716                             if (nfhp != NULL) {
3717                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3718                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3719                                     VREF(vp);
3720                                     newvp = vp;
3721                                     unlocknewvp = 0;
3722                                     free(nfhp, M_NFSFH);
3723                                     np = dnp;
3724                                 } else if (isdotdot != 0) {
3725                                     /*
3726                                      * Skip doing a nfscl_nget() call for "..".
3727                                      * There's a race between acquiring the nfs
3728                                      * node here and lookups that look for the
3729                                      * directory being read (in the parent).
3730                                      * It would try to get a lock on ".." here,
3731                                      * owning the lock on the directory being
3732                                      * read. Lookup will hold the lock on ".."
3733                                      * and try to acquire the lock on the
3734                                      * directory being read.
3735                                      * If the directory is unlocked/relocked,
3736                                      * then there is a LOR with the buflock
3737                                      * vp is relocked.
3738                                      */
3739                                     free(nfhp, M_NFSFH);
3740                                 } else {
3741                                     error = nfscl_nget(vp->v_mount, vp,
3742                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3743                                     if (!error) {
3744                                         newvp = NFSTOV(np);
3745                                         unlocknewvp = 1;
3746                                     }
3747                                 }
3748                                 nfhp = NULL;
3749                                 if (newvp != NULLVP) {
3750                                     error = nfscl_loadattrcache(&newvp,
3751                                         &nfsva, NULL, NULL, 0, 0);
3752                                     if (error) {
3753                                         if (unlocknewvp)
3754                                             vput(newvp);
3755                                         else
3756                                             vrele(newvp);
3757                                         goto nfsmout;
3758                                     }
3759                                     dp->d_type =
3760                                         vtonfs_dtype(np->n_vattr.na_type);
3761                                     ndp->ni_vp = newvp;
3762                                     NFSCNHASH(cnp, HASHINIT);
3763                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3764                                         (newvp->v_type != VDIR ||
3765                                          dctime.tv_sec != 0)) {
3766                                         cache_enter_time(ndp->ni_dvp,
3767                                             ndp->ni_vp, cnp,
3768                                             &nfsva.na_ctime,
3769                                             newvp->v_type != VDIR ? NULL :
3770                                             &dctime);
3771                                     }
3772                                     if (unlocknewvp)
3773                                         vput(newvp);
3774                                     else
3775                                         vrele(newvp);
3776                                     newvp = NULLVP;
3777                                 }
3778                             }
3779                         } else if (nfhp != NULL) {
3780                             free(nfhp, M_NFSFH);
3781                         }
3782                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3783                         more_dirs = fxdr_unsigned(int, *tl);
3784                 }
3785                 /*
3786                  * If at end of rpc data, get the eof boolean
3787                  */
3788                 if (!more_dirs) {
3789                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3790                         eof = fxdr_unsigned(int, *tl);
3791                         if (tryformoredirs)
3792                                 more_dirs = !eof;
3793                         if (nd->nd_flag & ND_NFSV4) {
3794                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3795                                     stuff);
3796                                 if (error)
3797                                         goto nfsmout;
3798                         }
3799                 }
3800                 m_freem(nd->nd_mrep);
3801                 nd->nd_mrep = NULL;
3802         }
3803         /*
3804          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3805          * by increasing d_reclen for the last record.
3806          */
3807         if (blksiz > 0) {
3808                 left = DIRBLKSIZ - blksiz;
3809                 NFSBZERO(uiop->uio_iov->iov_base, left);
3810                 dp->d_reclen += left;
3811                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3812                     left;
3813                 uiop->uio_iov->iov_len -= left;
3814                 uiop->uio_resid -= left;
3815                 uiop->uio_offset += left;
3816         }
3817
3818         /*
3819          * If returning no data, assume end of file.
3820          * If not bigenough, return not end of file, since you aren't
3821          *    returning all the data
3822          * Otherwise, return the eof flag from the server.
3823          */
3824         if (eofp != NULL) {
3825                 if (tresid == uiop->uio_resid)
3826                         *eofp = 1;
3827                 else if (!bigenough)
3828                         *eofp = 0;
3829                 else
3830                         *eofp = eof;
3831         }
3832
3833         /*
3834          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3835          */
3836         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3837                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3838                 NFSBZERO(dp, DIRBLKSIZ);
3839                 dp->d_type = DT_UNKNOWN;
3840                 tl = (u_int32_t *)&dp->d_name[4];
3841                 *tl++ = cookie.lval[0];
3842                 *tl = cookie.lval[1];
3843                 dp->d_reclen = DIRBLKSIZ;
3844                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3845                     DIRBLKSIZ;
3846                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3847                 uiop->uio_resid -= DIRBLKSIZ;
3848                 uiop->uio_offset += DIRBLKSIZ;
3849         }
3850
3851 nfsmout:
3852         if (nd->nd_mrep != NULL)
3853                 m_freem(nd->nd_mrep);
3854         return (error);
3855 }
3856 #endif  /* !APPLE */
3857
3858 /*
3859  * Nfs commit rpc
3860  */
3861 int
3862 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3863     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3864 {
3865         u_int32_t *tl;
3866         struct nfsrv_descript nfsd, *nd = &nfsd;
3867         nfsattrbit_t attrbits;
3868         int error;
3869         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3870
3871         *attrflagp = 0;
3872         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3873         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3874         txdr_hyper(offset, tl);
3875         tl += 2;
3876         *tl = txdr_unsigned(cnt);
3877         if (nd->nd_flag & ND_NFSV4) {
3878                 /*
3879                  * And do a Getattr op.
3880                  */
3881                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3882                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3883                 NFSGETATTR_ATTRBIT(&attrbits);
3884                 (void) nfsrv_putattrbit(nd, &attrbits);
3885         }
3886         error = nfscl_request(nd, vp, p, cred, stuff);
3887         if (error)
3888                 return (error);
3889         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3890         if (!error && !nd->nd_repstat) {
3891                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3892                 NFSLOCKMNT(nmp);
3893                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3894                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3895                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
3896                 }
3897                 NFSUNLOCKMNT(nmp);
3898                 if (nd->nd_flag & ND_NFSV4)
3899                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3900         }
3901 nfsmout:
3902         if (!error && nd->nd_repstat)
3903                 error = nd->nd_repstat;
3904         m_freem(nd->nd_mrep);
3905         return (error);
3906 }
3907
3908 /*
3909  * NFS byte range lock rpc.
3910  * (Mostly just calls one of the three lower level RPC routines.)
3911  */
3912 int
3913 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3914     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3915 {
3916         struct nfscllockowner *lp;
3917         struct nfsclclient *clp;
3918         struct nfsfh *nfhp;
3919         struct nfsrv_descript nfsd, *nd = &nfsd;
3920         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3921         u_int64_t off, len;
3922         off_t start, end;
3923         u_int32_t clidrev = 0;
3924         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3925         int callcnt, dorpc;
3926
3927         /*
3928          * Convert the flock structure into a start and end and do POSIX
3929          * bounds checking.
3930          */
3931         switch (fl->l_whence) {
3932         case SEEK_SET:
3933         case SEEK_CUR:
3934                 /*
3935                  * Caller is responsible for adding any necessary offset
3936                  * when SEEK_CUR is used.
3937                  */
3938                 start = fl->l_start;
3939                 off = fl->l_start;
3940                 break;
3941         case SEEK_END:
3942                 start = size + fl->l_start;
3943                 off = size + fl->l_start;
3944                 break;
3945         default:
3946                 return (EINVAL);
3947         }
3948         if (start < 0)
3949                 return (EINVAL);
3950         if (fl->l_len != 0) {
3951                 end = start + fl->l_len - 1;
3952                 if (end < start)
3953                         return (EINVAL);
3954         }
3955
3956         len = fl->l_len;
3957         if (len == 0)
3958                 len = NFS64BITSSET;
3959         retrycnt = 0;
3960         do {
3961             nd->nd_repstat = 0;
3962             if (op == F_GETLK) {
3963                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3964                 if (error)
3965                         return (error);
3966                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3967                 if (!error) {
3968                         clidrev = clp->nfsc_clientidrev;
3969                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3970                             p, id, flags);
3971                 } else if (error == -1) {
3972                         error = 0;
3973                 }
3974                 nfscl_clientrelease(clp);
3975             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3976                 /*
3977                  * We must loop around for all lockowner cases.
3978                  */
3979                 callcnt = 0;
3980                 error = nfscl_getcl(vp->v_mount, cred, p, 1, &clp);
3981                 if (error)
3982                         return (error);
3983                 do {
3984                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3985                         clp, id, flags, &lp, &dorpc);
3986                     /*
3987                      * If it returns a NULL lp, we're done.
3988                      */
3989                     if (lp == NULL) {
3990                         if (callcnt == 0)
3991                             nfscl_clientrelease(clp);
3992                         else
3993                             nfscl_releasealllocks(clp, vp, p, id, flags);
3994                         return (error);
3995                     }
3996                     if (nmp->nm_clp != NULL)
3997                         clidrev = nmp->nm_clp->nfsc_clientidrev;
3998                     else
3999                         clidrev = 0;
4000                     /*
4001                      * If the server doesn't support Posix lock semantics,
4002                      * only allow locks on the entire file, since it won't
4003                      * handle overlapping byte ranges.
4004                      * There might still be a problem when a lock
4005                      * upgrade/downgrade (read<->write) occurs, since the
4006                      * server "might" expect an unlock first?
4007                      */
4008                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4009                         (off == 0 && len == NFS64BITSSET))) {
4010                         /*
4011                          * Since the lock records will go away, we must
4012                          * wait for grace and delay here.
4013                          */
4014                         do {
4015                             error = nfsrpc_locku(nd, nmp, lp, off, len,
4016                                 NFSV4LOCKT_READ, cred, p, 0);
4017                             if ((nd->nd_repstat == NFSERR_GRACE ||
4018                                  nd->nd_repstat == NFSERR_DELAY) &&
4019                                 error == 0)
4020                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4021                                     "nfs_advlock");
4022                         } while ((nd->nd_repstat == NFSERR_GRACE ||
4023                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
4024                     }
4025                     callcnt++;
4026                 } while (error == 0 && nd->nd_repstat == 0);
4027                 nfscl_releasealllocks(clp, vp, p, id, flags);
4028             } else if (op == F_SETLK) {
4029                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4030                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4031                 if (error || donelocally) {
4032                         return (error);
4033                 }
4034                 if (nmp->nm_clp != NULL)
4035                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4036                 else
4037                         clidrev = 0;
4038                 nfhp = VTONFS(vp)->n_fhp;
4039                 if (!lp->nfsl_open->nfso_posixlock &&
4040                     (off != 0 || len != NFS64BITSSET)) {
4041                         error = EINVAL;
4042                 } else {
4043                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4044                             nfhp->nfh_len, lp, newone, reclaim, off,
4045                             len, fl->l_type, cred, p, 0);
4046                 }
4047                 if (!error)
4048                         error = nd->nd_repstat;
4049                 nfscl_lockrelease(lp, error, newone);
4050             } else {
4051                 error = EINVAL;
4052             }
4053             if (!error)
4054                 error = nd->nd_repstat;
4055             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4056                 error == NFSERR_STALEDONTRECOVER ||
4057                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4058                 error == NFSERR_BADSESSION) {
4059                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
4060             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4061                 && clidrev != 0) {
4062                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4063                 retrycnt++;
4064             }
4065         } while (error == NFSERR_GRACE ||
4066             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4067             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4068             error == NFSERR_BADSESSION ||
4069             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4070              expireret == 0 && clidrev != 0 && retrycnt < 4));
4071         if (error && retrycnt >= 4)
4072                 error = EIO;
4073         return (error);
4074 }
4075
4076 /*
4077  * The lower level routine for the LockT case.
4078  */
4079 int
4080 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4081     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4082     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4083 {
4084         u_int32_t *tl;
4085         int error, type, size;
4086         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4087         struct nfsnode *np;
4088         struct nfsmount *nmp;
4089         struct nfsclsession *tsep;
4090
4091         nmp = VFSTONFS(vp->v_mount);
4092         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
4093         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4094         if (fl->l_type == F_RDLCK)
4095                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4096         else
4097                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4098         txdr_hyper(off, tl);
4099         tl += 2;
4100         txdr_hyper(len, tl);
4101         tl += 2;
4102         tsep = nfsmnt_mdssession(nmp);
4103         *tl++ = tsep->nfsess_clientid.lval[0];
4104         *tl = tsep->nfsess_clientid.lval[1];
4105         nfscl_filllockowner(id, own, flags);
4106         np = VTONFS(vp);
4107         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4108             np->n_fhp->nfh_len);
4109         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4110         error = nfscl_request(nd, vp, p, cred, NULL);
4111         if (error)
4112                 return (error);
4113         if (nd->nd_repstat == 0) {
4114                 fl->l_type = F_UNLCK;
4115         } else if (nd->nd_repstat == NFSERR_DENIED) {
4116                 nd->nd_repstat = 0;
4117                 fl->l_whence = SEEK_SET;
4118                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4119                 fl->l_start = fxdr_hyper(tl);
4120                 tl += 2;
4121                 len = fxdr_hyper(tl);
4122                 tl += 2;
4123                 if (len == NFS64BITSSET)
4124                         fl->l_len = 0;
4125                 else
4126                         fl->l_len = len;
4127                 type = fxdr_unsigned(int, *tl++);
4128                 if (type == NFSV4LOCKT_WRITE)
4129                         fl->l_type = F_WRLCK;
4130                 else
4131                         fl->l_type = F_RDLCK;
4132                 /*
4133                  * XXX For now, I have no idea what to do with the
4134                  * conflicting lock_owner, so I'll just set the pid == 0
4135                  * and skip over the lock_owner.
4136                  */
4137                 fl->l_pid = (pid_t)0;
4138                 tl += 2;
4139                 size = fxdr_unsigned(int, *tl);
4140                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4141                         error = EBADRPC;
4142                 if (!error)
4143                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4144         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4145                 nfscl_initiate_recovery(clp);
4146 nfsmout:
4147         m_freem(nd->nd_mrep);
4148         return (error);
4149 }
4150
4151 /*
4152  * Lower level function that performs the LockU RPC.
4153  */
4154 static int
4155 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4156     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4157     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4158 {
4159         u_int32_t *tl;
4160         int error;
4161
4162         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4163             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0);
4164         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4165         *tl++ = txdr_unsigned(type);
4166         *tl = txdr_unsigned(lp->nfsl_seqid);
4167         if (nfstest_outofseq &&
4168             (arc4random() % nfstest_outofseq) == 0)
4169                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4170         tl++;
4171         if (NFSHASNFSV4N(nmp))
4172                 *tl++ = 0;
4173         else
4174                 *tl++ = lp->nfsl_stateid.seqid;
4175         *tl++ = lp->nfsl_stateid.other[0];
4176         *tl++ = lp->nfsl_stateid.other[1];
4177         *tl++ = lp->nfsl_stateid.other[2];
4178         txdr_hyper(off, tl);
4179         tl += 2;
4180         txdr_hyper(len, tl);
4181         if (syscred)
4182                 nd->nd_flag |= ND_USEGSSNAME;
4183         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4184             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4185         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4186         if (error)
4187                 return (error);
4188         if (nd->nd_repstat == 0) {
4189                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4190                 lp->nfsl_stateid.seqid = *tl++;
4191                 lp->nfsl_stateid.other[0] = *tl++;
4192                 lp->nfsl_stateid.other[1] = *tl++;
4193                 lp->nfsl_stateid.other[2] = *tl;
4194         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4195                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4196 nfsmout:
4197         m_freem(nd->nd_mrep);
4198         return (error);
4199 }
4200
4201 /*
4202  * The actual Lock RPC.
4203  */
4204 int
4205 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4206     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4207     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4208     NFSPROC_T *p, int syscred)
4209 {
4210         u_int32_t *tl;
4211         int error, size;
4212         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4213         struct nfsclsession *tsep;
4214
4215         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0);
4216         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4217         if (type == F_RDLCK)
4218                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4219         else
4220                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4221         *tl++ = txdr_unsigned(reclaim);
4222         txdr_hyper(off, tl);
4223         tl += 2;
4224         txdr_hyper(len, tl);
4225         tl += 2;
4226         if (newone) {
4227             *tl = newnfs_true;
4228             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4229                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4230             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4231             if (NFSHASNFSV4N(nmp))
4232                 *tl++ = 0;
4233             else
4234                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4235             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4236             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4237             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4238             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4239             tsep = nfsmnt_mdssession(nmp);
4240             *tl++ = tsep->nfsess_clientid.lval[0];
4241             *tl = tsep->nfsess_clientid.lval[1];
4242             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4243             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4244             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4245         } else {
4246             *tl = newnfs_false;
4247             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4248             if (NFSHASNFSV4N(nmp))
4249                 *tl++ = 0;
4250             else
4251                 *tl++ = lp->nfsl_stateid.seqid;
4252             *tl++ = lp->nfsl_stateid.other[0];
4253             *tl++ = lp->nfsl_stateid.other[1];
4254             *tl++ = lp->nfsl_stateid.other[2];
4255             *tl = txdr_unsigned(lp->nfsl_seqid);
4256             if (nfstest_outofseq &&
4257                 (arc4random() % nfstest_outofseq) == 0)
4258                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4259         }
4260         if (syscred)
4261                 nd->nd_flag |= ND_USEGSSNAME;
4262         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4263             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4264         if (error)
4265                 return (error);
4266         if (newone)
4267             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4268         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4269         if (nd->nd_repstat == 0) {
4270                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4271                 lp->nfsl_stateid.seqid = *tl++;
4272                 lp->nfsl_stateid.other[0] = *tl++;
4273                 lp->nfsl_stateid.other[1] = *tl++;
4274                 lp->nfsl_stateid.other[2] = *tl;
4275         } else if (nd->nd_repstat == NFSERR_DENIED) {
4276                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4277                 size = fxdr_unsigned(int, *(tl + 7));
4278                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4279                         error = EBADRPC;
4280                 if (!error)
4281                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4282         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4283                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4284 nfsmout:
4285         m_freem(nd->nd_mrep);
4286         return (error);
4287 }
4288
4289 /*
4290  * nfs statfs rpc
4291  * (always called with the vp for the mount point)
4292  */
4293 int
4294 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4295     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4296     void *stuff)
4297 {
4298         u_int32_t *tl = NULL;
4299         struct nfsrv_descript nfsd, *nd = &nfsd;
4300         struct nfsmount *nmp;
4301         nfsattrbit_t attrbits;
4302         int error;
4303
4304         *attrflagp = 0;
4305         nmp = VFSTONFS(vp->v_mount);
4306         if (NFSHASNFSV4(nmp)) {
4307                 /*
4308                  * For V4, you actually do a getattr.
4309                  */
4310                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4311                 NFSSTATFS_GETATTRBIT(&attrbits);
4312                 (void) nfsrv_putattrbit(nd, &attrbits);
4313                 nd->nd_flag |= ND_USEGSSNAME;
4314                 error = nfscl_request(nd, vp, p, cred, stuff);
4315                 if (error)
4316                         return (error);
4317                 if (nd->nd_repstat == 0) {
4318                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4319                             NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4320                             cred);
4321                         if (!error) {
4322                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4323                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4324                                 NFSSETHASSETFSID(nmp);
4325                                 *attrflagp = 1;
4326                         }
4327                 } else {
4328                         error = nd->nd_repstat;
4329                 }
4330                 if (error)
4331                         goto nfsmout;
4332         } else {
4333                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4334                 error = nfscl_request(nd, vp, p, cred, stuff);
4335                 if (error)
4336                         return (error);
4337                 if (nd->nd_flag & ND_NFSV3) {
4338                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4339                         if (error)
4340                                 goto nfsmout;
4341                 }
4342                 if (nd->nd_repstat) {
4343                         error = nd->nd_repstat;
4344                         goto nfsmout;
4345                 }
4346                 NFSM_DISSECT(tl, u_int32_t *,
4347                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4348         }
4349         if (NFSHASNFSV3(nmp)) {
4350                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4351                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4352                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4353                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4354                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4355                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4356                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4357         } else if (NFSHASNFSV4(nmp) == 0) {
4358                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4359                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4360                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4361                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4362                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4363         }
4364 nfsmout:
4365         m_freem(nd->nd_mrep);
4366         return (error);
4367 }
4368
4369 /*
4370  * nfs pathconf rpc
4371  */
4372 int
4373 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4374     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4375     void *stuff)
4376 {
4377         struct nfsrv_descript nfsd, *nd = &nfsd;
4378         struct nfsmount *nmp;
4379         u_int32_t *tl;
4380         nfsattrbit_t attrbits;
4381         int error;
4382
4383         *attrflagp = 0;
4384         nmp = VFSTONFS(vp->v_mount);
4385         if (NFSHASNFSV4(nmp)) {
4386                 /*
4387                  * For V4, you actually do a getattr.
4388                  */
4389                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4390                 NFSPATHCONF_GETATTRBIT(&attrbits);
4391                 (void) nfsrv_putattrbit(nd, &attrbits);
4392                 nd->nd_flag |= ND_USEGSSNAME;
4393                 error = nfscl_request(nd, vp, p, cred, stuff);
4394                 if (error)
4395                         return (error);
4396                 if (nd->nd_repstat == 0) {
4397                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4398                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4399                             cred);
4400                         if (!error)
4401                                 *attrflagp = 1;
4402                 } else {
4403                         error = nd->nd_repstat;
4404                 }
4405         } else {
4406                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4407                 error = nfscl_request(nd, vp, p, cred, stuff);
4408                 if (error)
4409                         return (error);
4410                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4411                 if (nd->nd_repstat && !error)
4412                         error = nd->nd_repstat;
4413                 if (!error) {
4414                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4415                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4416                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4417                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4418                         pc->pc_chownrestricted =
4419                             fxdr_unsigned(u_int32_t, *tl++);
4420                         pc->pc_caseinsensitive =
4421                             fxdr_unsigned(u_int32_t, *tl++);
4422                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4423                 }
4424         }
4425 nfsmout:
4426         m_freem(nd->nd_mrep);
4427         return (error);
4428 }
4429
4430 /*
4431  * nfs version 3 fsinfo rpc call
4432  */
4433 int
4434 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4435     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4436 {
4437         u_int32_t *tl;
4438         struct nfsrv_descript nfsd, *nd = &nfsd;
4439         int error;
4440
4441         *attrflagp = 0;
4442         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4443         error = nfscl_request(nd, vp, p, cred, stuff);
4444         if (error)
4445                 return (error);
4446         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4447         if (nd->nd_repstat && !error)
4448                 error = nd->nd_repstat;
4449         if (!error) {
4450                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4451                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4452                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4453                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4454                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4455                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4456                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4457                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4458                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4459                 tl += 2;
4460                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4461                 tl += 2;
4462                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4463         }
4464 nfsmout:
4465         m_freem(nd->nd_mrep);
4466         return (error);
4467 }
4468
4469 /*
4470  * This function performs the Renew RPC.
4471  */
4472 int
4473 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4474     NFSPROC_T *p)
4475 {
4476         u_int32_t *tl;
4477         struct nfsrv_descript nfsd;
4478         struct nfsrv_descript *nd = &nfsd;
4479         struct nfsmount *nmp;
4480         int error;
4481         struct nfssockreq *nrp;
4482         struct nfsclsession *tsep;
4483
4484         nmp = clp->nfsc_nmp;
4485         if (nmp == NULL)
4486                 return (0);
4487         if (dsp == NULL)
4488                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4489                     0);
4490         else
4491                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4492                     &dsp->nfsclds_sess, 0, 0);
4493         if (!NFSHASNFSV4N(nmp)) {
4494                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4495                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4496                 tsep = nfsmnt_mdssession(nmp);
4497                 *tl++ = tsep->nfsess_clientid.lval[0];
4498                 *tl = tsep->nfsess_clientid.lval[1];
4499         }
4500         nrp = NULL;
4501         if (dsp != NULL)
4502                 nrp = dsp->nfsclds_sockp;
4503         if (nrp == NULL)
4504                 /* If NULL, use the MDS socket. */
4505                 nrp = &nmp->nm_sockreq;
4506         nd->nd_flag |= ND_USEGSSNAME;
4507         if (dsp == NULL)
4508                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4509                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4510         else {
4511                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4512                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4513                 if (error == ENXIO)
4514                         nfscl_cancelreqs(dsp);
4515         }
4516         if (error)
4517                 return (error);
4518         error = nd->nd_repstat;
4519         m_freem(nd->nd_mrep);
4520         return (error);
4521 }
4522
4523 /*
4524  * This function performs the Releaselockowner RPC.
4525  */
4526 int
4527 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4528     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4529 {
4530         struct nfsrv_descript nfsd, *nd = &nfsd;
4531         u_int32_t *tl;
4532         int error;
4533         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4534         struct nfsclsession *tsep;
4535
4536         if (NFSHASNFSV4N(nmp)) {
4537                 /* For NFSv4.1, do a FreeStateID. */
4538                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4539                     NULL, 0, 0);
4540                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4541         } else {
4542                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4543                     NULL, 0, 0);
4544                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4545                 tsep = nfsmnt_mdssession(nmp);
4546                 *tl++ = tsep->nfsess_clientid.lval[0];
4547                 *tl = tsep->nfsess_clientid.lval[1];
4548                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4549                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4550                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4551         }
4552         nd->nd_flag |= ND_USEGSSNAME;
4553         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4554             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4555         if (error)
4556                 return (error);
4557         error = nd->nd_repstat;
4558         m_freem(nd->nd_mrep);
4559         return (error);
4560 }
4561
4562 /*
4563  * This function performs the Compound to get the mount pt FH.
4564  */
4565 int
4566 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4567     NFSPROC_T *p)
4568 {
4569         u_int32_t *tl;
4570         struct nfsrv_descript nfsd;
4571         struct nfsrv_descript *nd = &nfsd;
4572         u_char *cp, *cp2;
4573         int error, cnt, len, setnil;
4574         u_int32_t *opcntp;
4575
4576         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4577             0);
4578         cp = dirpath;
4579         cnt = 0;
4580         do {
4581                 setnil = 0;
4582                 while (*cp == '/')
4583                         cp++;
4584                 cp2 = cp;
4585                 while (*cp2 != '\0' && *cp2 != '/')
4586                         cp2++;
4587                 if (*cp2 == '/') {
4588                         setnil = 1;
4589                         *cp2 = '\0';
4590                 }
4591                 if (cp2 != cp) {
4592                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4593                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4594                         nfsm_strtom(nd, cp, strlen(cp));
4595                         cnt++;
4596                 }
4597                 if (setnil)
4598                         *cp2++ = '/';
4599                 cp = cp2;
4600         } while (*cp != '\0');
4601         if (NFSHASNFSV4N(nmp))
4602                 /* Has a Sequence Op done by nfscl_reqstart(). */
4603                 *opcntp = txdr_unsigned(3 + cnt);
4604         else
4605                 *opcntp = txdr_unsigned(2 + cnt);
4606         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4607         *tl = txdr_unsigned(NFSV4OP_GETFH);
4608         nd->nd_flag |= ND_USEGSSNAME;
4609         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4610                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4611         if (error)
4612                 return (error);
4613         if (nd->nd_repstat == 0) {
4614                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4615                 tl += (2 + 2 * cnt);
4616                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4617                         len > NFSX_FHMAX) {
4618                         nd->nd_repstat = NFSERR_BADXDR;
4619                 } else {
4620                         nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4621                         if (nd->nd_repstat == 0)
4622                                 nmp->nm_fhsize = len;
4623                 }
4624         }
4625         error = nd->nd_repstat;
4626 nfsmout:
4627         m_freem(nd->nd_mrep);
4628         return (error);
4629 }
4630
4631 /*
4632  * This function performs the Delegreturn RPC.
4633  */
4634 int
4635 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4636     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4637 {
4638         u_int32_t *tl;
4639         struct nfsrv_descript nfsd;
4640         struct nfsrv_descript *nd = &nfsd;
4641         int error;
4642
4643         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4644             dp->nfsdl_fhlen, NULL, NULL, 0, 0);
4645         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4646         if (NFSHASNFSV4N(nmp))
4647                 *tl++ = 0;
4648         else
4649                 *tl++ = dp->nfsdl_stateid.seqid;
4650         *tl++ = dp->nfsdl_stateid.other[0];
4651         *tl++ = dp->nfsdl_stateid.other[1];
4652         *tl = dp->nfsdl_stateid.other[2];
4653         if (syscred)
4654                 nd->nd_flag |= ND_USEGSSNAME;
4655         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4656             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4657         if (error)
4658                 return (error);
4659         error = nd->nd_repstat;
4660         m_freem(nd->nd_mrep);
4661         return (error);
4662 }
4663
4664 /*
4665  * nfs getacl call.
4666  */
4667 int
4668 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4669     struct acl *aclp, void *stuff)
4670 {
4671         struct nfsrv_descript nfsd, *nd = &nfsd;
4672         int error;
4673         nfsattrbit_t attrbits;
4674         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4675
4676         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4677                 return (EOPNOTSUPP);
4678         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4679         NFSZERO_ATTRBIT(&attrbits);
4680         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4681         (void) nfsrv_putattrbit(nd, &attrbits);
4682         error = nfscl_request(nd, vp, p, cred, stuff);
4683         if (error)
4684                 return (error);
4685         if (!nd->nd_repstat)
4686                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4687                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4688         else
4689                 error = nd->nd_repstat;
4690         m_freem(nd->nd_mrep);
4691         return (error);
4692 }
4693
4694 /*
4695  * nfs setacl call.
4696  */
4697 int
4698 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4699     struct acl *aclp, void *stuff)
4700 {
4701         int error;
4702         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4703
4704         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4705                 return (EOPNOTSUPP);
4706         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4707         return (error);
4708 }
4709
4710 /*
4711  * nfs setacl call.
4712  */
4713 static int
4714 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4715     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4716 {
4717         struct nfsrv_descript nfsd, *nd = &nfsd;
4718         int error;
4719         nfsattrbit_t attrbits;
4720         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4721
4722         if (!NFSHASNFSV4(nmp))
4723                 return (EOPNOTSUPP);
4724         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4725         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4726         NFSZERO_ATTRBIT(&attrbits);
4727         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4728         (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
4729             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4730         error = nfscl_request(nd, vp, p, cred, stuff);
4731         if (error)
4732                 return (error);
4733         /* Don't care about the pre/postop attributes */
4734         m_freem(nd->nd_mrep);
4735         return (nd->nd_repstat);
4736 }
4737
4738 /*
4739  * Do the NFSv4.1 Exchange ID.
4740  */
4741 int
4742 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4743     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
4744     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
4745 {
4746         uint32_t *tl, v41flags;
4747         struct nfsrv_descript nfsd;
4748         struct nfsrv_descript *nd = &nfsd;
4749         struct nfsclds *dsp;
4750         struct timespec verstime;
4751         int error, len;
4752
4753         *dspp = NULL;
4754         if (minorvers == 0)
4755                 minorvers = nmp->nm_minorvers;
4756         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
4757             NFS_VER4, minorvers);
4758         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4759         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4760         *tl = txdr_unsigned(clp->nfsc_rev);
4761         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4762
4763         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4764         *tl++ = txdr_unsigned(exchflags);
4765         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4766
4767         /* Set the implementation id4 */
4768         *tl = txdr_unsigned(1);
4769         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4770         (void) nfsm_strtom(nd, version, strlen(version));
4771         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4772         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4773         verstime.tv_nsec = 0;
4774         txdr_nfsv4time(&verstime, tl);
4775         nd->nd_flag |= ND_USEGSSNAME;
4776         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4777             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4778         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4779             (int)nd->nd_repstat);
4780         if (error != 0)
4781                 return (error);
4782         if (nd->nd_repstat == 0) {
4783                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4784                 len = fxdr_unsigned(int, *(tl + 7));
4785                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4786                         error = NFSERR_BADXDR;
4787                         goto nfsmout;
4788                 }
4789                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4790                     M_WAITOK | M_ZERO);
4791                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4792                 dsp->nfsclds_servownlen = len;
4793                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4794                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4795                 dsp->nfsclds_sess.nfsess_sequenceid =
4796                     fxdr_unsigned(uint32_t, *tl++);
4797                 v41flags = fxdr_unsigned(uint32_t, *tl);
4798                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4799                     NFSHASPNFSOPT(nmp)) {
4800                         NFSCL_DEBUG(1, "set PNFS\n");
4801                         NFSLOCKMNT(nmp);
4802                         nmp->nm_state |= NFSSTA_PNFS;
4803                         NFSUNLOCKMNT(nmp);
4804                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4805                 }
4806                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4807                         dsp->nfsclds_flags |= NFSCLDS_DS;
4808                 if (minorvers == NFSV42_MINORVERSION)
4809                         dsp->nfsclds_flags |= NFSCLDS_MINORV2;
4810                 if (len > 0)
4811                         nd->nd_repstat = nfsrv_mtostr(nd,
4812                             dsp->nfsclds_serverown, len);
4813                 if (nd->nd_repstat == 0) {
4814                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4815                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4816                             NULL, MTX_DEF);
4817                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4818                         *dspp = dsp;
4819                 } else
4820                         free(dsp, M_NFSCLDS);
4821         }
4822         error = nd->nd_repstat;
4823 nfsmout:
4824         m_freem(nd->nd_mrep);
4825         return (error);
4826 }
4827
4828 /*
4829  * Do the NFSv4.1 Create Session.
4830  */
4831 int
4832 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4833     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
4834     struct ucred *cred, NFSPROC_T *p)
4835 {
4836         uint32_t crflags, maxval, *tl;
4837         struct nfsrv_descript nfsd;
4838         struct nfsrv_descript *nd = &nfsd;
4839         int error, irdcnt, minorvers;
4840
4841         /* Make sure nm_rsize, nm_wsize is set. */
4842         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4843                 nmp->nm_rsize = NFS_MAXBSIZE;
4844         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4845                 nmp->nm_wsize = NFS_MAXBSIZE;
4846         if (dsp == NULL)
4847                 minorvers = nmp->nm_minorvers;
4848         else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
4849                 minorvers = NFSV42_MINORVERSION;
4850         else
4851                 minorvers = NFSV41_MINORVERSION;
4852         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
4853             NFS_VER4, minorvers);
4854         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4855         *tl++ = sep->nfsess_clientid.lval[0];
4856         *tl++ = sep->nfsess_clientid.lval[1];
4857         *tl++ = txdr_unsigned(sequenceid);
4858         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4859         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4860                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
4861         *tl = txdr_unsigned(crflags);
4862
4863         /* Fill in fore channel attributes. */
4864         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4865         *tl++ = 0;                              /* Header pad size */
4866         if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
4867             nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
4868                 /*
4869                  * NFSv4.2 Extended Attribute operations may want to do
4870                  * requests/replies that are larger than nm_rsize/nm_wsize.
4871                  */
4872                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4873                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
4874         } else {
4875                 *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
4876                 *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
4877         }
4878         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4879         *tl++ = txdr_unsigned(20);              /* Max operations */
4880         *tl++ = txdr_unsigned(64);              /* Max slots */
4881         *tl = 0;                                /* No rdma ird */
4882
4883         /* Fill in back channel attributes. */
4884         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4885         *tl++ = 0;                              /* Header pad size */
4886         *tl++ = txdr_unsigned(10000);           /* Max request size */
4887         *tl++ = txdr_unsigned(10000);           /* Max response size */
4888         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
4889         *tl++ = txdr_unsigned(4);               /* Max operations */
4890         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
4891         *tl = 0;                                /* No rdma ird */
4892
4893         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4894         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
4895
4896         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
4897         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
4898         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
4899         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4900         *tl++ = 0;                              /* Null machine name */
4901         *tl++ = 0;                              /* Uid == 0 */
4902         *tl++ = 0;                              /* Gid == 0 */
4903         *tl = 0;                                /* No additional gids */
4904         nd->nd_flag |= ND_USEGSSNAME;
4905         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4906             NFS_VER4, NULL, 1, NULL, NULL);
4907         if (error != 0)
4908                 return (error);
4909         if (nd->nd_repstat == 0) {
4910                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4911                     2 * NFSX_UNSIGNED);
4912                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4913                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4914                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4915                 crflags = fxdr_unsigned(uint32_t, *tl);
4916                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4917                         NFSLOCKMNT(nmp);
4918                         nmp->nm_state |= NFSSTA_SESSPERSIST;
4919                         NFSUNLOCKMNT(nmp);
4920                 }
4921
4922                 /* Get the fore channel slot count. */
4923                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4924                 tl++;                   /* Skip the header pad size. */
4925
4926                 /* Make sure nm_wsize is small enough. */
4927                 maxval = fxdr_unsigned(uint32_t, *tl++);
4928                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4929                         if (nmp->nm_wsize > 8096)
4930                                 nmp->nm_wsize /= 2;
4931                         else
4932                                 break;
4933                 }
4934                 sep->nfsess_maxreq = maxval;
4935
4936                 /* Make sure nm_rsize is small enough. */
4937                 maxval = fxdr_unsigned(uint32_t, *tl++);
4938                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4939                         if (nmp->nm_rsize > 8096)
4940                                 nmp->nm_rsize /= 2;
4941                         else
4942                                 break;
4943                 }
4944                 sep->nfsess_maxresp = maxval;
4945
4946                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4947                 tl++;
4948                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4949                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4950                 irdcnt = fxdr_unsigned(int, *tl);
4951                 if (irdcnt > 0)
4952                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4953
4954                 /* and the back channel slot count. */
4955                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4956                 tl += 5;
4957                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4958                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4959         }
4960         error = nd->nd_repstat;
4961 nfsmout:
4962         m_freem(nd->nd_mrep);
4963         return (error);
4964 }
4965
4966 /*
4967  * Do the NFSv4.1 Destroy Session.
4968  */
4969 int
4970 nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4971     struct ucred *cred, NFSPROC_T *p)
4972 {
4973         uint32_t *tl;
4974         struct nfsrv_descript nfsd;
4975         struct nfsrv_descript *nd = &nfsd;
4976         int error;
4977         struct nfsclsession *tsep;
4978
4979         nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL, 0,
4980             0);
4981         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4982         tsep = nfsmnt_mdssession(nmp);
4983         bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4984         nd->nd_flag |= ND_USEGSSNAME;
4985         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4986             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4987         if (error != 0)
4988                 return (error);
4989         error = nd->nd_repstat;
4990         m_freem(nd->nd_mrep);
4991         return (error);
4992 }
4993
4994 /*
4995  * Do the NFSv4.1 Destroy Client.
4996  */
4997 int
4998 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4999     struct ucred *cred, NFSPROC_T *p)
5000 {
5001         uint32_t *tl;
5002         struct nfsrv_descript nfsd;
5003         struct nfsrv_descript *nd = &nfsd;
5004         int error;
5005         struct nfsclsession *tsep;
5006
5007         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5008             0);
5009         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5010         tsep = nfsmnt_mdssession(nmp);
5011         *tl++ = tsep->nfsess_clientid.lval[0];
5012         *tl = tsep->nfsess_clientid.lval[1];
5013         nd->nd_flag |= ND_USEGSSNAME;
5014         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5015             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5016         if (error != 0)
5017                 return (error);
5018         error = nd->nd_repstat;
5019         m_freem(nd->nd_mrep);
5020         return (error);
5021 }
5022
5023 /*
5024  * Do the NFSv4.1 LayoutGet.
5025  */
5026 static int
5027 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5028     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5029     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5030     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
5031     void *stuff)
5032 {
5033         struct nfsrv_descript nfsd, *nd = &nfsd;
5034         int error;
5035
5036         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5037             0);
5038         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5039             layouttype, layoutlen, 0);
5040         nd->nd_flag |= ND_USEGSSNAME;
5041         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5042             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5043         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5044         if (error != 0)
5045                 return (error);
5046         if (nd->nd_repstat == 0)
5047                 error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5048                     flhp);
5049         if (error == 0 && nd->nd_repstat != 0)
5050                 error = nd->nd_repstat;
5051         m_freem(nd->nd_mrep);
5052         return (error);
5053 }
5054
5055 /*
5056  * Do the NFSv4.1 Get Device Info.
5057  */
5058 int
5059 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5060     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5061     NFSPROC_T *p)
5062 {
5063         uint32_t cnt, *tl, vers, minorvers;
5064         struct nfsrv_descript nfsd;
5065         struct nfsrv_descript *nd = &nfsd;
5066         struct sockaddr_in sin, ssin;
5067         struct sockaddr_in6 sin6, ssin6;
5068         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5069         struct nfscldevinfo *ndi;
5070         int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5071         int stripecnt;
5072         uint8_t stripeindex;
5073         sa_family_t af, safilled;
5074
5075         ssin.sin_port = 0;              /* To shut up compiler. */
5076         ssin.sin_addr.s_addr = 0;       /* ditto */
5077         *ndip = NULL;
5078         ndi = NULL;
5079         gotdspp = NULL;
5080         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5081             0);
5082         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5083         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5084         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5085         *tl++ = txdr_unsigned(layouttype);
5086         *tl++ = txdr_unsigned(100000);
5087         if (notifybitsp != NULL && *notifybitsp != 0) {
5088                 *tl = txdr_unsigned(1);         /* One word of bits. */
5089                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5090                 *tl = txdr_unsigned(*notifybitsp);
5091         } else
5092                 *tl = txdr_unsigned(0);
5093         nd->nd_flag |= ND_USEGSSNAME;
5094         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5095             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5096         if (error != 0)
5097                 return (error);
5098         if (nd->nd_repstat == 0) {
5099                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5100                 if (layouttype != fxdr_unsigned(int, *tl))
5101                         printf("EEK! devinfo layout type not same!\n");
5102                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5103                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5104                         stripecnt = fxdr_unsigned(int, *tl);
5105                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5106                         if (stripecnt < 1 || stripecnt > 4096) {
5107                                 printf("pNFS File layout devinfo stripecnt %d:"
5108                                     " out of range\n", stripecnt);
5109                                 error = NFSERR_BADXDR;
5110                                 goto nfsmout;
5111                         }
5112                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5113                             NFSX_UNSIGNED);
5114                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5115                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5116                         if (addrcnt < 1 || addrcnt > 128) {
5117                                 printf("NFS devinfo addrcnt %d: out of range\n",
5118                                     addrcnt);
5119                                 error = NFSERR_BADXDR;
5120                                 goto nfsmout;
5121                         }
5122
5123                         /*
5124                          * Now we know how many stripe indices and addresses, so
5125                          * we can allocate the structure the correct size.
5126                          */
5127                         i = (stripecnt * sizeof(uint8_t)) /
5128                             sizeof(struct nfsclds *) + 1;
5129                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5130                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5131                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5132                             M_ZERO);
5133                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5134                             NFSX_V4DEVICEID);
5135                         ndi->nfsdi_refcnt = 0;
5136                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5137                         ndi->nfsdi_stripecnt = stripecnt;
5138                         ndi->nfsdi_addrcnt = addrcnt;
5139                         /* Fill in the stripe indices. */
5140                         for (i = 0; i < stripecnt; i++) {
5141                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5142                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5143                                 if (stripeindex >= addrcnt) {
5144                                         printf("pNFS File Layout devinfo"
5145                                             " stripeindex %d: too big\n",
5146                                             (int)stripeindex);
5147                                         error = NFSERR_BADXDR;
5148                                         goto nfsmout;
5149                                 }
5150                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5151                         }
5152                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5153                         /* For Flex File, we only get one address list. */
5154                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5155                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5156                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5157                             NFSX_V4DEVICEID);
5158                         ndi->nfsdi_refcnt = 0;
5159                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5160                         addrcnt = ndi->nfsdi_addrcnt = 1;
5161                 }
5162
5163                 /* Now, dissect the server address(es). */
5164                 safilled = AF_UNSPEC;
5165                 for (i = 0; i < addrcnt; i++) {
5166                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5167                         cnt = fxdr_unsigned(uint32_t, *tl);
5168                         if (cnt == 0) {
5169                                 printf("NFS devinfo 0 len addrlist\n");
5170                                 error = NFSERR_BADXDR;
5171                                 goto nfsmout;
5172                         }
5173                         dspp = nfsfldi_addr(ndi, i);
5174                         safilled = AF_UNSPEC;
5175                         for (j = 0; j < cnt; j++) {
5176                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5177                                     &isudp);
5178                                 if (error != 0 && error != EPERM) {
5179                                         error = NFSERR_BADXDR;
5180                                         goto nfsmout;
5181                                 }
5182                                 if (error == 0 && isudp == 0) {
5183                                         /*
5184                                          * The priority is:
5185                                          * - Same address family.
5186                                          * Save the address and dspp, so that
5187                                          * the connection can be done after
5188                                          * parsing is complete.
5189                                          */
5190                                         if (safilled == AF_UNSPEC ||
5191                                             (af == nmp->nm_nam->sa_family &&
5192                                              safilled != nmp->nm_nam->sa_family)
5193                                            ) {
5194                                                 if (af == AF_INET)
5195                                                         ssin = sin;
5196                                                 else
5197                                                         ssin6 = sin6;
5198                                                 safilled = af;
5199                                                 gotdspp = dspp;
5200                                         }
5201                                 }
5202                         }
5203                 }
5204
5205                 gotvers = NFS_VER4;     /* Default NFSv4.1 for File Layout. */
5206                 gotminor = NFSV41_MINORVERSION;
5207                 /* For Flex File, we will take one of the versions to use. */
5208                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5209                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5210                         j = fxdr_unsigned(int, *tl);
5211                         if (j < 1 || j > NFSDEV_MAXVERS) {
5212                                 printf("pNFS: too many versions\n");
5213                                 error = NFSERR_BADXDR;
5214                                 goto nfsmout;
5215                         }
5216                         gotvers = 0;
5217                         gotminor = 0;
5218                         for (i = 0; i < j; i++) {
5219                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5220                                 vers = fxdr_unsigned(uint32_t, *tl++);
5221                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5222                                 if (vers == NFS_VER3)
5223                                         minorvers = 0;
5224                                 if ((vers == NFS_VER4 && ((minorvers ==
5225                                     NFSV41_MINORVERSION && gotminor == 0) ||
5226                                     minorvers == NFSV42_MINORVERSION)) ||
5227                                     (vers == NFS_VER3 && gotvers == 0)) {
5228                                         gotvers = vers;
5229                                         gotminor = minorvers;
5230                                         /* We'll take this one. */
5231                                         ndi->nfsdi_versindex = i;
5232                                         ndi->nfsdi_vers = vers;
5233                                         ndi->nfsdi_minorvers = minorvers;
5234                                         ndi->nfsdi_rsize = fxdr_unsigned(
5235                                             uint32_t, *tl++);
5236                                         ndi->nfsdi_wsize = fxdr_unsigned(
5237                                             uint32_t, *tl++);
5238                                         if (*tl == newnfs_true)
5239                                                 ndi->nfsdi_flags |=
5240                                                     NFSDI_TIGHTCOUPLED;
5241                                         else
5242                                                 ndi->nfsdi_flags &=
5243                                                     ~NFSDI_TIGHTCOUPLED;
5244                                 }
5245                         }
5246                         if (gotvers == 0) {
5247                                 printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5248                                 error = NFSERR_BADXDR;
5249                                 goto nfsmout;
5250                         }
5251                 }
5252
5253                 /* And the notify bits. */
5254                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5255                 bitcnt = fxdr_unsigned(int, *tl);
5256                 if (bitcnt > 0) {
5257                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5258                         if (notifybitsp != NULL)
5259                                 *notifybitsp =
5260                                     fxdr_unsigned(uint32_t, *tl);
5261                 }
5262                 if (safilled != AF_UNSPEC) {
5263                         KASSERT(ndi != NULL, ("ndi is NULL"));
5264                         *ndip = ndi;
5265                 } else
5266                         error = EPERM;
5267                 if (error == 0) {
5268                         /*
5269                          * Now we can do a TCP connection for the correct
5270                          * NFS version and IP address.
5271                          */
5272                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5273                             gotvers, gotminor, &dsp, p);
5274                 }
5275                 if (error == 0) {
5276                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5277                         *gotdspp = dsp;
5278                 }
5279         }
5280         if (nd->nd_repstat != 0 && error == 0)
5281                 error = nd->nd_repstat;
5282 nfsmout:
5283         if (error != 0 && ndi != NULL)
5284                 nfscl_freedevinfo(ndi);
5285         m_freem(nd->nd_mrep);
5286         return (error);
5287 }
5288
5289 /*
5290  * Do the NFSv4.1 LayoutCommit.
5291  */
5292 int
5293 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5294     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5295     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5296 {
5297         uint32_t *tl;
5298         struct nfsrv_descript nfsd, *nd = &nfsd;
5299         int error;
5300
5301         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5302             0, 0);
5303         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5304             NFSX_STATEID);
5305         txdr_hyper(off, tl);
5306         tl += 2;
5307         txdr_hyper(len, tl);
5308         tl += 2;
5309         if (reclaim != 0)
5310                 *tl++ = newnfs_true;
5311         else
5312                 *tl++ = newnfs_false;
5313         *tl++ = txdr_unsigned(stateidp->seqid);
5314         *tl++ = stateidp->other[0];
5315         *tl++ = stateidp->other[1];
5316         *tl++ = stateidp->other[2];
5317         *tl++ = newnfs_true;
5318         if (lastbyte < off)
5319                 lastbyte = off;
5320         else if (lastbyte >= (off + len))
5321                 lastbyte = off + len - 1;
5322         txdr_hyper(lastbyte, tl);
5323         tl += 2;
5324         *tl++ = newnfs_false;
5325         *tl++ = txdr_unsigned(layouttype);
5326         /* All supported layouts are 0 length. */
5327         *tl = txdr_unsigned(0);
5328         nd->nd_flag |= ND_USEGSSNAME;
5329         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5330             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5331         if (error != 0)
5332                 return (error);
5333         error = nd->nd_repstat;
5334         m_freem(nd->nd_mrep);
5335         return (error);
5336 }
5337
5338 /*
5339  * Do the NFSv4.1 LayoutReturn.
5340  */
5341 int
5342 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5343     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5344     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5345     uint32_t stat, uint32_t op, char *devid)
5346 {
5347         uint32_t *tl;
5348         struct nfsrv_descript nfsd, *nd = &nfsd;
5349         uint64_t tu64;
5350         int error;
5351
5352         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5353             0, 0);
5354         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5355         if (reclaim != 0)
5356                 *tl++ = newnfs_true;
5357         else
5358                 *tl++ = newnfs_false;
5359         *tl++ = txdr_unsigned(layouttype);
5360         *tl++ = txdr_unsigned(iomode);
5361         *tl = txdr_unsigned(layoutreturn);
5362         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5363                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5364                     NFSX_UNSIGNED);
5365                 txdr_hyper(offset, tl);
5366                 tl += 2;
5367                 txdr_hyper(len, tl);
5368                 tl += 2;
5369                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5370                 *tl++ = txdr_unsigned(stateidp->seqid);
5371                 *tl++ = stateidp->other[0];
5372                 *tl++ = stateidp->other[1];
5373                 *tl++ = stateidp->other[2];
5374                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5375                         *tl = txdr_unsigned(0);
5376                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5377                         if (stat != 0) {
5378                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5379                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5380                                     NFSX_UNSIGNED);
5381                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5382                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5383                                     NFSX_UNSIGNED);
5384                                 *tl++ = txdr_unsigned(1);       /* One error. */
5385                                 tu64 = 0;                       /* Offset. */
5386                                 txdr_hyper(tu64, tl); tl += 2;
5387                                 tu64 = UINT64_MAX;              /* Length. */
5388                                 txdr_hyper(tu64, tl); tl += 2;
5389                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5390                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5391                                 *tl++ = txdr_unsigned(1);       /* One error. */
5392                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5393                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5394                                 *tl++ = txdr_unsigned(stat);
5395                                 *tl++ = txdr_unsigned(op);
5396                         } else {
5397                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5398                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5399                                 /* No ioerrs. */
5400                                 *tl++ = 0;
5401                         }
5402                         *tl = 0;        /* No stats yet. */
5403                 }
5404         }
5405         nd->nd_flag |= ND_USEGSSNAME;
5406         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5407             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5408         if (error != 0)
5409                 return (error);
5410         if (nd->nd_repstat == 0) {
5411                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5412                 if (*tl != 0) {
5413                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5414                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5415                         stateidp->other[0] = *tl++;
5416                         stateidp->other[1] = *tl++;
5417                         stateidp->other[2] = *tl;
5418                 }
5419         } else
5420                 error = nd->nd_repstat;
5421 nfsmout:
5422         m_freem(nd->nd_mrep);
5423         return (error);
5424 }
5425
5426 /*
5427  * Acquire a layout and devinfo, if possible. The caller must have acquired
5428  * a reference count on the nfsclclient structure before calling this.
5429  * Return the layout in lypp with a reference count on it, if successful.
5430  */
5431 static int
5432 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5433     int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5434     struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5435 {
5436         struct nfscllayout *lyp;
5437         struct nfsclflayout *flp;
5438         struct nfsclflayouthead flh;
5439         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5440         nfsv4stateid_t stateid;
5441         struct nfsclsession *tsep;
5442
5443         *lypp = NULL;
5444         if (NFSHASFLEXFILE(nmp))
5445                 layouttype = NFSLAYOUT_FLEXFILE;
5446         else
5447                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5448         /*
5449          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5450          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5451          * flp == NULL.
5452          */
5453         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5454             off, &flp, &recalled);
5455         islocked = 0;
5456         if (lyp == NULL || flp == NULL) {
5457                 if (recalled != 0)
5458                         return (EIO);
5459                 LIST_INIT(&flh);
5460                 tsep = nfsmnt_mdssession(nmp);
5461                 layoutlen = tsep->nfsess_maxcache -
5462                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5463                 if (lyp == NULL) {
5464                         stateid.seqid = 0;
5465                         stateid.other[0] = stateidp->other[0];
5466                         stateid.other[1] = stateidp->other[1];
5467                         stateid.other[2] = stateidp->other[2];
5468                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5469                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5470                             (uint64_t)0, layouttype, layoutlen, &stateid,
5471                             &retonclose, &flh, cred, p, NULL);
5472                 } else {
5473                         islocked = 1;
5474                         stateid.seqid = lyp->nfsly_stateid.seqid;
5475                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5476                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5477                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5478                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5479                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5480                             (uint64_t)0, layouttype, layoutlen, &stateid,
5481                             &retonclose, &flh, cred, p, NULL);
5482                 }
5483                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5484                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5485                     &flh, layouttype, error, NULL, cred, p);
5486                 if (error == 0)
5487                         *lypp = lyp;
5488                 else if (islocked != 0)
5489                         nfscl_rellayout(lyp, 1);
5490         } else
5491                 *lypp = lyp;
5492         return (error);
5493 }
5494
5495 /*
5496  * Do a TCP connection plus exchange id and create session.
5497  * If successful, a "struct nfsclds" is linked into the list for the
5498  * mount point and a pointer to it is returned.
5499  */
5500 static int
5501 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5502     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5503     struct nfsclds **dspp, NFSPROC_T *p)
5504 {
5505         struct sockaddr_in *msad, *sad;
5506         struct sockaddr_in6 *msad6, *sad6;
5507         struct nfsclclient *clp;
5508         struct nfssockreq *nrp;
5509         struct nfsclds *dsp, *tdsp;
5510         int error, firsttry;
5511         enum nfsclds_state retv;
5512         uint32_t sequenceid = 0;
5513
5514         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5515             ("nfsrpc_fillsa: NULL nr_cred"));
5516         NFSLOCKCLSTATE();
5517         clp = nmp->nm_clp;
5518         NFSUNLOCKCLSTATE();
5519         if (clp == NULL)
5520                 return (EPERM);
5521         if (af == AF_INET) {
5522                 NFSLOCKMNT(nmp);
5523                 /*
5524                  * Check to see if we already have a session for this
5525                  * address that is usable for a DS.
5526                  * Note that the MDS's address is in a different place
5527                  * than the sessions already acquired for DS's.
5528                  */
5529                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5530                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5531                 while (tdsp != NULL) {
5532                         if (msad != NULL && msad->sin_family == AF_INET &&
5533                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5534                             sin->sin_port == msad->sin_port &&
5535                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5536                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5537                                 *dspp = tdsp;
5538                                 NFSUNLOCKMNT(nmp);
5539                                 NFSCL_DEBUG(4, "fnd same addr\n");
5540                                 return (0);
5541                         }
5542                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5543                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5544                                 msad = (struct sockaddr_in *)
5545                                     tdsp->nfsclds_sockp->nr_nam;
5546                         else
5547                                 msad = NULL;
5548                 }
5549                 NFSUNLOCKMNT(nmp);
5550
5551                 /* No IP address match, so look for new/trunked one. */
5552                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5553                 sad->sin_len = sizeof(*sad);
5554                 sad->sin_family = AF_INET;
5555                 sad->sin_port = sin->sin_port;
5556                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5557                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5558                 nrp->nr_nam = (struct sockaddr *)sad;
5559         } else if (af == AF_INET6) {
5560                 NFSLOCKMNT(nmp);
5561                 /*
5562                  * Check to see if we already have a session for this
5563                  * address that is usable for a DS.
5564                  * Note that the MDS's address is in a different place
5565                  * than the sessions already acquired for DS's.
5566                  */
5567                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5568                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5569                 while (tdsp != NULL) {
5570                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5571                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5572                             &msad6->sin6_addr) &&
5573                             sin6->sin6_port == msad6->sin6_port &&
5574                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5575                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5576                                 *dspp = tdsp;
5577                                 NFSUNLOCKMNT(nmp);
5578                                 return (0);
5579                         }
5580                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5581                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5582                                 msad6 = (struct sockaddr_in6 *)
5583                                     tdsp->nfsclds_sockp->nr_nam;
5584                         else
5585                                 msad6 = NULL;
5586                 }
5587                 NFSUNLOCKMNT(nmp);
5588
5589                 /* No IP address match, so look for new/trunked one. */
5590                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5591                 sad6->sin6_len = sizeof(*sad6);
5592                 sad6->sin6_family = AF_INET6;
5593                 sad6->sin6_port = sin6->sin6_port;
5594                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5595                     sizeof(struct in6_addr));
5596                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5597                 nrp->nr_nam = (struct sockaddr *)sad6;
5598         } else
5599                 return (EPERM);
5600
5601         nrp->nr_sotype = SOCK_STREAM;
5602         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5603         nrp->nr_prog = NFS_PROG;
5604         nrp->nr_vers = vers;
5605
5606         /*
5607          * Use the credentials that were used for the mount, which are
5608          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5609          * Ref. counting the credentials with crhold() is probably not
5610          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5611          * unmount, but I did it anyhow.
5612          */
5613         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5614         error = newnfs_connect(nmp, nrp, NULL, p, 0, false);
5615         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5616
5617         dsp = NULL;
5618         /* Now, do the exchangeid and create session. */
5619         if (error == 0) {
5620                 if (vers == NFS_VER4) {
5621                         firsttry = 0;
5622                         do {
5623                                 error = nfsrpc_exchangeid(nmp, clp, nrp, 
5624                                     minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5625                                     nrp->nr_cred, p);
5626                                 NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5627                                 if (error == NFSERR_MINORVERMISMATCH)
5628                                         minorvers = NFSV42_MINORVERSION;
5629                         } while (error == NFSERR_MINORVERMISMATCH &&
5630                             firsttry++ == 0);
5631                         if (error != 0)
5632                                 newnfs_disconnect(nrp);
5633                 } else {
5634                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5635                             M_WAITOK | M_ZERO);
5636                         dsp->nfsclds_flags |= NFSCLDS_DS;
5637                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5638                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5639                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5640                             NULL, MTX_DEF);
5641                 }
5642         }
5643         if (error == 0) {
5644                 dsp->nfsclds_sockp = nrp;
5645                 if (vers == NFS_VER4) {
5646                         NFSLOCKMNT(nmp);
5647                         retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5648                             &sequenceid);
5649                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5650                         if (retv == NFSDSP_USETHISSESSION &&
5651                             nfscl_dssameconn != 0) {
5652                                 NFSLOCKDS(tdsp);
5653                                 tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5654                                 NFSUNLOCKDS(tdsp);
5655                                 NFSUNLOCKMNT(nmp);
5656                                 /*
5657                                  * If there is already a session for this
5658                                  * server, use it.
5659                                  */
5660                                 (void)newnfs_disconnect(nrp);
5661                                 nfscl_freenfsclds(dsp);
5662                                 *dspp = tdsp;
5663                                 return (0);
5664                         }
5665                         if (retv == NFSDSP_NOTFOUND)
5666                                 sequenceid =
5667                                     dsp->nfsclds_sess.nfsess_sequenceid;
5668                         NFSUNLOCKMNT(nmp);
5669                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5670                             nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
5671                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5672                 }
5673         } else {
5674                 NFSFREECRED(nrp->nr_cred);
5675                 NFSFREEMUTEX(&nrp->nr_mtx);
5676                 free(nrp->nr_nam, M_SONAME);
5677                 free(nrp, M_NFSSOCKREQ);
5678         }
5679         if (error == 0) {
5680                 NFSCL_DEBUG(3, "add DS session\n");
5681                 /*
5682                  * Put it at the end of the list. That way the list
5683                  * is ordered by when the entry was added. This matters
5684                  * since the one done first is the one that should be
5685                  * used for sequencid'ing any subsequent create sessions.
5686                  */
5687                 NFSLOCKMNT(nmp);
5688                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5689                 NFSUNLOCKMNT(nmp);
5690                 *dspp = dsp;
5691         } else if (dsp != NULL) {
5692                 newnfs_disconnect(nrp);
5693                 nfscl_freenfsclds(dsp);
5694         }
5695         return (error);
5696 }
5697
5698 /*
5699  * Do the NFSv4.1 Reclaim Complete.
5700  */
5701 int
5702 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5703 {
5704         uint32_t *tl;
5705         struct nfsrv_descript nfsd;
5706         struct nfsrv_descript *nd = &nfsd;
5707         int error;
5708
5709         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5710             0);
5711         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5712         *tl = newnfs_false;
5713         nd->nd_flag |= ND_USEGSSNAME;
5714         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5715             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5716         if (error != 0)
5717                 return (error);
5718         error = nd->nd_repstat;
5719         m_freem(nd->nd_mrep);
5720         return (error);
5721 }
5722
5723 /*
5724  * Initialize the slot tables for a session.
5725  */
5726 static void
5727 nfscl_initsessionslots(struct nfsclsession *sep)
5728 {
5729         int i;
5730
5731         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5732                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5733                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5734                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5735         }
5736         for (i = 0; i < 64; i++)
5737                 sep->nfsess_slotseq[i] = 0;
5738         sep->nfsess_slots = 0;
5739 }
5740
5741 /*
5742  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5743  */
5744 int
5745 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5746     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5747 {
5748         struct nfsnode *np = VTONFS(vp);
5749         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5750         struct nfscllayout *layp;
5751         struct nfscldevinfo *dip;
5752         struct nfsclflayout *rflp;
5753         struct mbuf *m, *m2;
5754         struct nfsclwritedsdorpc *drpc, *tdrpc;
5755         nfsv4stateid_t stateid;
5756         struct ucred *newcred;
5757         uint64_t lastbyte, len, off, oresid, xfer;
5758         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5759         void *lckp;
5760         uint8_t *dev;
5761         void *iovbase = NULL;
5762         size_t iovlen = 0;
5763         off_t offs = 0;
5764         ssize_t resid = 0;
5765
5766         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5767             (np->n_flag & NNOLAYOUT) != 0)
5768                 return (EIO);
5769         /* Now, get a reference cnt on the clientid for this mount. */
5770         if (nfscl_getref(nmp) == 0)
5771                 return (EIO);
5772
5773         /* Find an appropriate stateid. */
5774         newcred = NFSNEWCRED(cred);
5775         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5776             rwaccess, 1, newcred, p, &stateid, &lckp);
5777         if (error != 0) {
5778                 NFSFREECRED(newcred);
5779                 nfscl_relref(nmp);
5780                 return (error);
5781         }
5782         /* Search for a layout for this file. */
5783         off = uiop->uio_offset;
5784         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5785             np->n_fhp->nfh_len, off, &rflp, &recalled);
5786         if (layp == NULL || rflp == NULL) {
5787                 if (recalled != 0) {
5788                         NFSFREECRED(newcred);
5789                         nfscl_relref(nmp);
5790                         return (EIO);
5791                 }
5792                 if (layp != NULL) {
5793                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5794                         layp = NULL;
5795                 }
5796                 /* Try and get a Layout, if it is supported. */
5797                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5798                     (np->n_flag & NWRITEOPENED) != 0)
5799                         iolaymode = NFSLAYOUTIOMODE_RW;
5800                 else
5801                         iolaymode = NFSLAYOUTIOMODE_READ;
5802                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5803                     NULL, &stateid, off, &layp, newcred, p);
5804                 if (error != 0) {
5805                         NFSLOCKNODE(np);
5806                         np->n_flag |= NNOLAYOUT;
5807                         NFSUNLOCKNODE(np);
5808                         if (lckp != NULL)
5809                                 nfscl_lockderef(lckp);
5810                         NFSFREECRED(newcred);
5811                         if (layp != NULL)
5812                                 nfscl_rellayout(layp, 0);
5813                         nfscl_relref(nmp);
5814                         return (error);
5815                 }
5816         }
5817
5818         /*
5819          * Loop around finding a layout that works for the first part of
5820          * this I/O operation, and then call the function that actually
5821          * does the RPC.
5822          */
5823         eof = 0;
5824         len = (uint64_t)uiop->uio_resid;
5825         while (len > 0 && error == 0 && eof == 0) {
5826                 off = uiop->uio_offset;
5827                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5828                 if (error == 0) {
5829                         oresid = xfer = (uint64_t)uiop->uio_resid;
5830                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5831                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5832                         /*
5833                          * For Flex File layout with mirrored DSs, select one
5834                          * of them at random for reads. For writes and commits,
5835                          * do all mirrors.
5836                          */
5837                         m = NULL;
5838                         tdrpc = drpc = NULL;
5839                         firstmirror = 0;
5840                         mirrorcnt = 1;
5841                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
5842                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
5843                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
5844                                         firstmirror = arc4random() % mirrorcnt;
5845                                         mirrorcnt = firstmirror + 1;
5846                                 } else {
5847                                         if (docommit == 0) {
5848                                                 /*
5849                                                  * Save values, so uiop can be
5850                                                  * rolled back upon a write
5851                                                  * error.
5852                                                  */
5853                                                 offs = uiop->uio_offset;
5854                                                 resid = uiop->uio_resid;
5855                                                 iovbase =
5856                                                     uiop->uio_iov->iov_base;
5857                                                 iovlen = uiop->uio_iov->iov_len;
5858                                                 m = nfsm_uiombuflist(uiop, len,
5859                                                     0);
5860                                         }
5861                                         tdrpc = drpc = malloc(sizeof(*drpc) *
5862                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
5863                                             M_ZERO);
5864                                 }
5865                         }
5866                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
5867                                 m2 = NULL;
5868                                 if (m != NULL && i < mirrorcnt - 1)
5869                                         m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
5870                                 else {
5871                                         m2 = m;
5872                                         m = NULL;
5873                                 }
5874                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
5875                                         dev = rflp->nfsfl_ffm[i].dev;
5876                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5877                                             rflp->nfsfl_ffm[i].devp);
5878                                 } else {
5879                                         dev = rflp->nfsfl_dev;
5880                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
5881                                             rflp->nfsfl_devp);
5882                                 }
5883                                 if (dip != NULL) {
5884                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
5885                                             != 0)
5886                                                 error = nfscl_dofflayoutio(vp,
5887                                                     uiop, iomode, must_commit,
5888                                                     &eof, &stateid, rwaccess,
5889                                                     dip, layp, rflp, off, xfer,
5890                                                     i, docommit, m2, tdrpc,
5891                                                     newcred, p);
5892                                         else
5893                                                 error = nfscl_doflayoutio(vp,
5894                                                     uiop, iomode, must_commit,
5895                                                     &eof, &stateid, rwaccess,
5896                                                     dip, layp, rflp, off, xfer,
5897                                                     docommit, newcred, p);
5898                                         nfscl_reldevinfo(dip);
5899                                 } else {
5900                                         if (m2 != NULL)
5901                                                 m_freem(m2);
5902                                         error = EIO;
5903                                 }
5904                                 tdrpc++;
5905                         }
5906                         if (m != NULL)
5907                                 m_freem(m);
5908                         tdrpc = drpc;
5909                         timo = hz / 50;         /* Wait for 20msec. */
5910                         if (timo < 1)
5911                                 timo = 1;
5912                         for (i = firstmirror; i < mirrorcnt - 1 &&
5913                             tdrpc != NULL; i++, tdrpc++) {
5914                                 /*
5915                                  * For the unused drpc entries, both inprog and
5916                                  * err == 0, so this loop won't break.
5917                                  */
5918                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
5919                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
5920                                             timo);
5921                                 if (error == 0 && tdrpc->err != 0)
5922                                         error = tdrpc->err;
5923                         }
5924                         free(drpc, M_TEMP);
5925                         if (error == 0) {
5926                                 if (mirrorcnt > 1 && rwaccess ==
5927                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5928                                         NFSLOCKCLSTATE();
5929                                         layp->nfsly_flags |= NFSLY_WRITTEN;
5930                                         NFSUNLOCKCLSTATE();
5931                                 }
5932                                 lastbyte = off + xfer - 1;
5933                                 NFSLOCKCLSTATE();
5934                                 if (lastbyte > layp->nfsly_lastbyte)
5935                                         layp->nfsly_lastbyte = lastbyte;
5936                                 NFSUNLOCKCLSTATE();
5937                         } else if (error == NFSERR_OPENMODE &&
5938                             rwaccess == NFSV4OPEN_ACCESSREAD) {
5939                                 NFSLOCKMNT(nmp);
5940                                 nmp->nm_state |= NFSSTA_OPENMODE;
5941                                 NFSUNLOCKMNT(nmp);
5942                         } else
5943                                 error = EIO;
5944                         if (error == 0)
5945                                 len -= (oresid - (uint64_t)uiop->uio_resid);
5946                         else if (mirrorcnt > 1 && rwaccess ==
5947                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
5948                                 /*
5949                                  * In case the rpc gets retried, roll the
5950                                  * uio fields changed by nfsm_uiombuflist()
5951                                  * back.
5952                                  */
5953                                 uiop->uio_offset = offs;
5954                                 uiop->uio_resid = resid;
5955                                 uiop->uio_iov->iov_base = iovbase;
5956                                 uiop->uio_iov->iov_len = iovlen;
5957                         }
5958                 }
5959         }
5960         if (lckp != NULL)
5961                 nfscl_lockderef(lckp);
5962         NFSFREECRED(newcred);
5963         nfscl_rellayout(layp, 0);
5964         nfscl_relref(nmp);
5965         return (error);
5966 }
5967
5968 /*
5969  * Find a file layout that will handle the first bytes of the requested
5970  * range and return the information from it needed to the I/O operation.
5971  */
5972 int
5973 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5974     struct nfsclflayout **retflpp)
5975 {
5976         struct nfsclflayout *flp, *nflp, *rflp;
5977         uint32_t rw;
5978
5979         rflp = NULL;
5980         rw = rwaccess;
5981         /* For reading, do the Read list first and then the Write list. */
5982         do {
5983                 if (rw == NFSV4OPEN_ACCESSREAD)
5984                         flp = LIST_FIRST(&lyp->nfsly_flayread);
5985                 else
5986                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
5987                 while (flp != NULL) {
5988                         nflp = LIST_NEXT(flp, nfsfl_list);
5989                         if (flp->nfsfl_off > off)
5990                                 break;
5991                         if (flp->nfsfl_end > off &&
5992                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5993                                 rflp = flp;
5994                         flp = nflp;
5995                 }
5996                 if (rw == NFSV4OPEN_ACCESSREAD)
5997                         rw = NFSV4OPEN_ACCESSWRITE;
5998                 else
5999                         rw = 0;
6000         } while (rw != 0);
6001         if (rflp != NULL) {
6002                 /* This one covers the most bytes starting at off. */
6003                 *retflpp = rflp;
6004                 return (0);
6005         }
6006         return (EIO);
6007 }
6008
6009 /*
6010  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6011  */
6012 static int
6013 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6014     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6015     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6016     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6017 {
6018         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6019         int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6020         struct nfsnode *np;
6021         struct nfsfh *fhp;
6022         struct nfsclds **dspp;
6023
6024         np = VTONFS(vp);
6025         rel_off = off - flp->nfsfl_patoff;
6026         stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
6027         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6028             dp->nfsdi_stripecnt;
6029         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6030         error = 0;
6031
6032         /* Loop around, doing I/O for each stripe unit. */
6033         while (len > 0 && error == 0) {
6034                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6035                 dspp = nfsfldi_addr(dp, stripe_index);
6036                 if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6037                         minorvers = NFSV42_MINORVERSION;
6038                 else
6039                         minorvers = NFSV41_MINORVERSION;
6040                 if (len > transfer && docommit == 0)
6041                         xfer = transfer;
6042                 else
6043                         xfer = len;
6044                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6045                         /* Dense layout. */
6046                         if (stripe_pos >= flp->nfsfl_fhcnt)
6047                                 return (EIO);
6048                         fhp = flp->nfsfl_fh[stripe_pos];
6049                         io_off = (rel_off / (stripe_unit_size *
6050                             dp->nfsdi_stripecnt)) * stripe_unit_size +
6051                             rel_off % stripe_unit_size;
6052                 } else {
6053                         /* Sparse layout. */
6054                         if (flp->nfsfl_fhcnt > 1) {
6055                                 if (stripe_index >= flp->nfsfl_fhcnt)
6056                                         return (EIO);
6057                                 fhp = flp->nfsfl_fh[stripe_index];
6058                         } else if (flp->nfsfl_fhcnt == 1)
6059                                 fhp = flp->nfsfl_fh[0];
6060                         else
6061                                 fhp = np->n_fhp;
6062                         io_off = off;
6063                 }
6064                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6065                         commit_thru_mds = 1;
6066                         if (docommit != 0)
6067                                 error = EIO;
6068                 } else {
6069                         commit_thru_mds = 0;
6070                         NFSLOCKNODE(np);
6071                         np->n_flag |= NDSCOMMIT;
6072                         NFSUNLOCKNODE(np);
6073                 }
6074                 if (docommit != 0) {
6075                         if (error == 0)
6076                                 error = nfsrpc_commitds(vp, io_off, xfer,
6077                                     *dspp, fhp, NFS_VER4, minorvers, cred, p);
6078                         if (error == 0) {
6079                                 /*
6080                                  * Set both eof and uio_resid = 0 to end any
6081                                  * loops.
6082                                  */
6083                                 *eofp = 1;
6084                                 uiop->uio_resid = 0;
6085                         } else {
6086                                 NFSLOCKNODE(np);
6087                                 np->n_flag &= ~NDSCOMMIT;
6088                                 NFSUNLOCKNODE(np);
6089                         }
6090                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6091                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6092                             io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6093                 else {
6094                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6095                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6096                             0, NFS_VER4, minorvers, cred, p);
6097                         if (error == 0) {
6098                                 NFSLOCKCLSTATE();
6099                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
6100                                 NFSUNLOCKCLSTATE();
6101                         }
6102                 }
6103                 if (error == 0) {
6104                         transfer = stripe_unit_size;
6105                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6106                         len -= xfer;
6107                         off += xfer;
6108                 }
6109         }
6110         return (error);
6111 }
6112
6113 /*
6114  * Do I/O using an NFSv4.1 flex file layout.
6115  */
6116 static int
6117 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6118     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6119     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6120     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6121     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6122 {
6123         uint64_t xfer;
6124         int error;
6125         struct nfsnode *np;
6126         struct nfsfh *fhp;
6127         struct nfsclds **dspp;
6128         struct ucred *tcred;
6129         struct mbuf *m, *m2;
6130         uint32_t copylen;
6131
6132         np = VTONFS(vp);
6133         error = 0;
6134         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6135             (uintmax_t)len);
6136         /* Loop around, doing I/O for each stripe unit. */
6137         while (len > 0 && error == 0) {
6138                 dspp = nfsfldi_addr(dp, 0);
6139                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6140                 stateidp = &flp->nfsfl_ffm[mirror].st;
6141                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6142                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6143                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6144                         tcred = NFSNEWCRED(cred);
6145                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6146                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6147                         tcred->cr_ngroups = 1;
6148                 } else
6149                         tcred = cred;
6150                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6151                         copylen = dp->nfsdi_rsize;
6152                 else {
6153                         copylen = dp->nfsdi_wsize;
6154                         if (len > copylen && mp != NULL) {
6155                                 /*
6156                                  * When a mirrored configuration needs to do
6157                                  * multiple writes to each mirror, all writes
6158                                  * except the last one must be a multiple of
6159                                  * 4 bytes.  This is required so that the XDR
6160                                  * does not need padding.
6161                                  * If possible, clip the size to an exact
6162                                  * multiple of the mbuf length, so that the
6163                                  * split will be on an mbuf boundary.
6164                                  */
6165                                 copylen &= 0xfffffffc;
6166                                 if (copylen > mp->m_len)
6167                                         copylen = copylen / mp->m_len *
6168                                             mp->m_len;
6169                         }
6170                 }
6171                 NFSLOCKNODE(np);
6172                 np->n_flag |= NDSCOMMIT;
6173                 NFSUNLOCKNODE(np);
6174                 if (len > copylen && docommit == 0)
6175                         xfer = copylen;
6176                 else
6177                         xfer = len;
6178                 if (docommit != 0) {
6179                         if (error == 0) {
6180                                 /*
6181                                  * Do last mirrored DS commit with this thread.
6182                                  */
6183                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6184                                         error = nfsio_commitds(vp, off, xfer,
6185                                             *dspp, fhp, dp->nfsdi_vers,
6186                                             dp->nfsdi_minorvers, drpc, tcred,
6187                                             p);
6188                                 else
6189                                         error = nfsrpc_commitds(vp, off, xfer,
6190                                             *dspp, fhp, dp->nfsdi_vers,
6191                                             dp->nfsdi_minorvers, tcred, p);
6192                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6193                                 if (error != 0 && error != EACCES && error !=
6194                                     ESTALE) {
6195                                         NFSCL_DEBUG(4,
6196                                             "DS layreterr for commit\n");
6197                                         nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6198                                             lyp, *dspp);
6199                                 }
6200                         }
6201                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6202                         if (error == 0) {
6203                                 /*
6204                                  * Set both eof and uio_resid = 0 to end any
6205                                  * loops.
6206                                  */
6207                                 *eofp = 1;
6208                                 uiop->uio_resid = 0;
6209                         } else {
6210                                 NFSLOCKNODE(np);
6211                                 np->n_flag &= ~NDSCOMMIT;
6212                                 NFSUNLOCKNODE(np);
6213                         }
6214                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6215                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6216                             off, xfer, fhp, 1, dp->nfsdi_vers,
6217                             dp->nfsdi_minorvers, tcred, p);
6218                         NFSCL_DEBUG(4, "readds=%d\n", error);
6219                         if (error != 0 && error != EACCES && error != ESTALE) {
6220                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6221                                 nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6222                                     *dspp);
6223                         }
6224                 } else {
6225                         if (flp->nfsfl_mirrorcnt == 1) {
6226                                 error = nfsrpc_writeds(vp, uiop, iomode,
6227                                     must_commit, stateidp, *dspp, off, xfer,
6228                                     fhp, 0, 1, dp->nfsdi_vers,
6229                                     dp->nfsdi_minorvers, tcred, p);
6230                                 if (error == 0) {
6231                                         NFSLOCKCLSTATE();
6232                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6233                                         NFSUNLOCKCLSTATE();
6234                                 }
6235                         } else {
6236                                 m = mp;
6237                                 if (xfer < len) {
6238                                         /* The mbuf list must be split. */
6239                                         m2 = nfsm_split(mp, xfer);
6240                                         if (m2 != NULL)
6241                                                 mp = m2;
6242                                         else {
6243                                                 m_freem(mp);
6244                                                 error = EIO;
6245                                         }
6246                                 }
6247                                 NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6248                                     (uintmax_t)len, (uintmax_t)xfer);
6249                                 /*
6250                                  * Do last write to a mirrored DS with this
6251                                  * thread.
6252                                  */
6253                                 if (error == 0) {
6254                                         if (mirror < flp->nfsfl_mirrorcnt - 1)
6255                                                 error = nfsio_writedsmir(vp,
6256                                                     iomode, must_commit,
6257                                                     stateidp, *dspp, off,
6258                                                     xfer, fhp, m,
6259                                                     dp->nfsdi_vers,
6260                                                     dp->nfsdi_minorvers, drpc,
6261                                                     tcred, p);
6262                                         else
6263                                                 error = nfsrpc_writedsmir(vp,
6264                                                     iomode, must_commit,
6265                                                     stateidp, *dspp, off,
6266                                                     xfer, fhp, m,
6267                                                     dp->nfsdi_vers,
6268                                                     dp->nfsdi_minorvers, tcred,
6269                                                     p);
6270                                 }
6271                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6272                                 if (error != 0 && error != EACCES && error !=
6273                                     ESTALE) {
6274                                         NFSCL_DEBUG(4,
6275                                             "DS layreterr for write\n");
6276                                         nfscl_dserr(NFSV4OP_WRITE, error, dp,
6277                                             lyp, *dspp);
6278                                 }
6279                         }
6280                 }
6281                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6282                 if (error == 0) {
6283                         len -= xfer;
6284                         off += xfer;
6285                 }
6286                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6287                         NFSFREECRED(tcred);
6288         }
6289         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6290         return (error);
6291 }
6292
6293 /*
6294  * The actual read RPC done to a DS.
6295  */
6296 static int
6297 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6298     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6299     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6300 {
6301         uint32_t *tl;
6302         int attrflag, error, retlen;
6303         struct nfsrv_descript nfsd;
6304         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6305         struct nfsrv_descript *nd = &nfsd;
6306         struct nfssockreq *nrp;
6307         struct nfsvattr na;
6308
6309         nd->nd_mrep = NULL;
6310         if (vers == 0 || vers == NFS_VER4) {
6311                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6312                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6313                 vers = NFS_VER4;
6314                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6315                 if (flex != 0)
6316                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6317                 else
6318                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6319         } else {
6320                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6321                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6322                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6323         }
6324         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6325         txdr_hyper(io_off, tl);
6326         *(tl + 2) = txdr_unsigned(len);
6327         nrp = dsp->nfsclds_sockp;
6328         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6329         if (nrp == NULL)
6330                 /* If NULL, use the MDS socket. */
6331                 nrp = &nmp->nm_sockreq;
6332         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6333             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6334         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6335             error);
6336         if (error != 0)
6337                 return (error);
6338         if (vers == NFS_VER3) {
6339                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6340                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6341                 if (error != 0)
6342                         goto nfsmout;
6343         }
6344         if (nd->nd_repstat != 0) {
6345                 error = nd->nd_repstat;
6346                 goto nfsmout;
6347         }
6348         if (vers == NFS_VER3) {
6349                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6350                 *eofp = fxdr_unsigned(int, *(tl + 1));
6351         } else {
6352                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6353                 *eofp = fxdr_unsigned(int, *tl);
6354         }
6355         NFSM_STRSIZ(retlen, len);
6356         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6357         error = nfsm_mbufuio(nd, uiop, retlen);
6358 nfsmout:
6359         if (nd->nd_mrep != NULL)
6360                 m_freem(nd->nd_mrep);
6361         return (error);
6362 }
6363
6364 /*
6365  * The actual write RPC done to a DS.
6366  */
6367 static int
6368 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6369     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6370     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6371     struct ucred *cred, NFSPROC_T *p)
6372 {
6373         uint32_t *tl;
6374         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6375         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6376         int32_t backup;
6377         struct nfsrv_descript nfsd;
6378         struct nfsrv_descript *nd = &nfsd;
6379         struct nfssockreq *nrp;
6380         struct nfsvattr na;
6381
6382         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6383         nd->nd_mrep = NULL;
6384         if (vers == 0 || vers == NFS_VER4) {
6385                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6386                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6387                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6388                 vers = NFS_VER4;
6389                 if (flex != 0)
6390                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6391                 else
6392                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6393                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6394         } else {
6395                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6396                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6397                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6398                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6399         }
6400         txdr_hyper(io_off, tl);
6401         tl += 2;
6402         if (vers == NFS_VER3)
6403                 *tl++ = txdr_unsigned(len);
6404         *tl++ = txdr_unsigned(*iomode);
6405         *tl = txdr_unsigned(len);
6406         nfsm_uiombuf(nd, uiop, len);
6407         nrp = dsp->nfsclds_sockp;
6408         if (nrp == NULL)
6409                 /* If NULL, use the MDS socket. */
6410                 nrp = &nmp->nm_sockreq;
6411         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6412             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6413         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6414             nd->nd_repstat);
6415         if (error != 0)
6416                 return (error);
6417         if (nd->nd_repstat != 0) {
6418                 /*
6419                  * In case the rpc gets retried, roll
6420                  * the uio fileds changed by nfsm_uiombuf()
6421                  * back.
6422                  */
6423                 uiop->uio_offset -= len;
6424                 uiop->uio_resid += len;
6425                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6426                 uiop->uio_iov->iov_len += len;
6427                 error = nd->nd_repstat;
6428         } else {
6429                 if (vers == NFS_VER3) {
6430                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6431                             NULL);
6432                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6433                         if (error != 0)
6434                                 goto nfsmout;
6435                 }
6436                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6437                 rlen = fxdr_unsigned(int, *tl++);
6438                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6439                 if (rlen == 0) {
6440                         error = NFSERR_IO;
6441                         goto nfsmout;
6442                 } else if (rlen < len) {
6443                         backup = len - rlen;
6444                         uiop->uio_iov->iov_base =
6445                             (char *)uiop->uio_iov->iov_base - backup;
6446                         uiop->uio_iov->iov_len += backup;
6447                         uiop->uio_offset -= backup;
6448                         uiop->uio_resid += backup;
6449                         len = rlen;
6450                 }
6451                 commit = fxdr_unsigned(int, *tl++);
6452
6453                 /*
6454                  * Return the lowest commitment level
6455                  * obtained by any of the RPCs.
6456                  */
6457                 if (committed == NFSWRITE_FILESYNC)
6458                         committed = commit;
6459                 else if (committed == NFSWRITE_DATASYNC &&
6460                     commit == NFSWRITE_UNSTABLE)
6461                         committed = commit;
6462                 if (commit_thru_mds != 0) {
6463                         NFSLOCKMNT(nmp);
6464                         if (!NFSHASWRITEVERF(nmp)) {
6465                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6466                                 NFSSETWRITEVERF(nmp);
6467                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
6468                                 *must_commit = 1;
6469                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6470                         }
6471                         NFSUNLOCKMNT(nmp);
6472                 } else {
6473                         NFSLOCKDS(dsp);
6474                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6475                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6476                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6477                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6478                                 *must_commit = 1;
6479                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6480                         }
6481                         NFSUNLOCKDS(dsp);
6482                 }
6483         }
6484 nfsmout:
6485         if (nd->nd_mrep != NULL)
6486                 m_freem(nd->nd_mrep);
6487         *iomode = committed;
6488         if (nd->nd_repstat != 0 && error == 0)
6489                 error = nd->nd_repstat;
6490         return (error);
6491 }
6492
6493 /*
6494  * The actual write RPC done to a DS.
6495  * This variant is called from a separate kernel process for mirrors.
6496  * Any short write is considered an IO error.
6497  */
6498 static int
6499 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6500     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6501     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6502     struct ucred *cred, NFSPROC_T *p)
6503 {
6504         uint32_t *tl;
6505         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6506         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6507         struct nfsrv_descript nfsd;
6508         struct nfsrv_descript *nd = &nfsd;
6509         struct nfssockreq *nrp;
6510         struct nfsvattr na;
6511
6512         nd->nd_mrep = NULL;
6513         if (vers == 0 || vers == NFS_VER4) {
6514                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6515                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6516                 vers = NFS_VER4;
6517                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6518                     minorvers);
6519                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6520                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6521         } else {
6522                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6523                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6524                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6525                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6526         }
6527         txdr_hyper(io_off, tl);
6528         tl += 2;
6529         if (vers == NFS_VER3)
6530                 *tl++ = txdr_unsigned(len);
6531         *tl++ = txdr_unsigned(*iomode);
6532         *tl = txdr_unsigned(len);
6533         if (len > 0) {
6534                 /* Put data in mbuf chain. */
6535                 nd->nd_mb->m_next = m;
6536         }
6537         nrp = dsp->nfsclds_sockp;
6538         if (nrp == NULL)
6539                 /* If NULL, use the MDS socket. */
6540                 nrp = &nmp->nm_sockreq;
6541         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6542             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6543         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6544             nd->nd_repstat);
6545         if (error != 0)
6546                 return (error);
6547         if (nd->nd_repstat != 0)
6548                 error = nd->nd_repstat;
6549         else {
6550                 if (vers == NFS_VER3) {
6551                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6552                             NULL);
6553                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6554                             error);
6555                         if (error != 0)
6556                                 goto nfsmout;
6557                 }
6558                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6559                 rlen = fxdr_unsigned(int, *tl++);
6560                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6561                     rlen);
6562                 if (rlen != len) {
6563                         error = NFSERR_IO;
6564                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6565                             len, rlen);
6566                         goto nfsmout;
6567                 }
6568                 commit = fxdr_unsigned(int, *tl++);
6569
6570                 /*
6571                  * Return the lowest commitment level
6572                  * obtained by any of the RPCs.
6573                  */
6574                 if (committed == NFSWRITE_FILESYNC)
6575                         committed = commit;
6576                 else if (committed == NFSWRITE_DATASYNC &&
6577                     commit == NFSWRITE_UNSTABLE)
6578                         committed = commit;
6579                 NFSLOCKDS(dsp);
6580                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6581                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6582                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6583                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6584                         *must_commit = 1;
6585                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6586                 }
6587                 NFSUNLOCKDS(dsp);
6588         }
6589 nfsmout:
6590         if (nd->nd_mrep != NULL)
6591                 m_freem(nd->nd_mrep);
6592         *iomode = committed;
6593         if (nd->nd_repstat != 0 && error == 0)
6594                 error = nd->nd_repstat;
6595         return (error);
6596 }
6597
6598 /*
6599  * Start up the thread that will execute nfsrpc_writedsmir().
6600  */
6601 static void
6602 start_writedsmir(void *arg, int pending)
6603 {
6604         struct nfsclwritedsdorpc *drpc;
6605
6606         drpc = (struct nfsclwritedsdorpc *)arg;
6607         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6608             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6609             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6610             drpc->p);
6611         drpc->done = 1;
6612         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6613 }
6614
6615 /*
6616  * Set up the write DS mirror call for the pNFS I/O thread.
6617  */
6618 static int
6619 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6620     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6621     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6622     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6623 {
6624         int error, ret;
6625
6626         error = 0;
6627         drpc->done = 0;
6628         drpc->vp = vp;
6629         drpc->iomode = *iomode;
6630         drpc->must_commit = *must_commit;
6631         drpc->stateidp = stateidp;
6632         drpc->dsp = dsp;
6633         drpc->off = off;
6634         drpc->len = len;
6635         drpc->fhp = fhp;
6636         drpc->m = m;
6637         drpc->vers = vers;
6638         drpc->minorvers = minorvers;
6639         drpc->cred = cred;
6640         drpc->p = p;
6641         drpc->inprog = 0;
6642         ret = EIO;
6643         if (nfs_pnfsiothreads != 0) {
6644                 ret = nfs_pnfsio(start_writedsmir, drpc);
6645                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6646         }
6647         if (ret != 0)
6648                 error = nfsrpc_writedsmir(vp, iomode, must_commit, stateidp,
6649                     dsp, off, len, fhp, m, vers, minorvers, cred, p);
6650         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6651         return (error);
6652 }
6653
6654 /*
6655  * Free up the nfsclds structure.
6656  */
6657 void
6658 nfscl_freenfsclds(struct nfsclds *dsp)
6659 {
6660         int i;
6661
6662         if (dsp == NULL)
6663                 return;
6664         if (dsp->nfsclds_sockp != NULL) {
6665                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6666                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6667                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6668                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6669         }
6670         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6671         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6672         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6673                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6674                         m_freem(
6675                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6676         }
6677         free(dsp, M_NFSCLDS);
6678 }
6679
6680 static enum nfsclds_state
6681 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6682     struct nfsclds **retdspp, uint32_t *sequencep)
6683 {
6684         struct nfsclds *dsp;
6685         int fndseq;
6686
6687         /*
6688          * Search the list of nfsclds structures for one with the same
6689          * server.
6690          */
6691         fndseq = 0;
6692         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6693                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6694                     dsp->nfsclds_servownlen != 0 &&
6695                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6696                     dsp->nfsclds_servownlen) &&
6697                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6698                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6699                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6700                             dsp->nfsclds_flags);
6701                         if (fndseq == 0) {
6702                                 /* Get sequenceid# from first entry. */
6703                                 *sequencep =
6704                                     dsp->nfsclds_sess.nfsess_sequenceid;
6705                                 fndseq = 1;
6706                         }
6707                         /* Server major id matches. */
6708                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6709                                 *retdspp = dsp;
6710                                 return (NFSDSP_USETHISSESSION);
6711                         }
6712                 }
6713         }
6714         if (fndseq != 0)
6715                 return (NFSDSP_SEQTHISSESSION);
6716         return (NFSDSP_NOTFOUND);
6717 }
6718
6719 /*
6720  * NFS commit rpc to a NFSv4.1 DS.
6721  */
6722 static int
6723 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6724     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6725     NFSPROC_T *p)
6726 {
6727         uint32_t *tl;
6728         struct nfsrv_descript nfsd, *nd = &nfsd;
6729         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6730         struct nfssockreq *nrp;
6731         struct nfsvattr na;
6732         int attrflag, error;
6733
6734         nd->nd_mrep = NULL;
6735         if (vers == 0 || vers == NFS_VER4) {
6736                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6737                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6738                 vers = NFS_VER4;
6739         } else
6740                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6741                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6742         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6743             minorvers);
6744         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6745         txdr_hyper(offset, tl);
6746         tl += 2;
6747         *tl = txdr_unsigned(cnt);
6748         nrp = dsp->nfsclds_sockp;
6749         if (nrp == NULL)
6750                 /* If NULL, use the MDS socket. */
6751                 nrp = &nmp->nm_sockreq;
6752         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6753             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6754         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6755             nd->nd_repstat);
6756         if (error != 0)
6757                 return (error);
6758         if (nd->nd_repstat == 0) {
6759                 if (vers == NFS_VER3) {
6760                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6761                             NULL);
6762                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
6763                         if (error != 0)
6764                                 goto nfsmout;
6765                 }
6766                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
6767                 NFSLOCKDS(dsp);
6768                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
6769                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6770                         error = NFSERR_STALEWRITEVERF;
6771                 }
6772                 NFSUNLOCKDS(dsp);
6773         }
6774 nfsmout:
6775         if (error == 0 && nd->nd_repstat != 0)
6776                 error = nd->nd_repstat;
6777         m_freem(nd->nd_mrep);
6778         return (error);
6779 }
6780
6781 /*
6782  * Start up the thread that will execute nfsrpc_commitds().
6783  */
6784 static void
6785 start_commitds(void *arg, int pending)
6786 {
6787         struct nfsclwritedsdorpc *drpc;
6788
6789         drpc = (struct nfsclwritedsdorpc *)arg;
6790         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
6791             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
6792             drpc->p);
6793         drpc->done = 1;
6794         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
6795 }
6796
6797 /*
6798  * Set up the commit DS mirror call for the pNFS I/O thread.
6799  */
6800 static int
6801 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6802     struct nfsfh *fhp, int vers, int minorvers,
6803     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6804 {
6805         int error, ret;
6806
6807         error = 0;
6808         drpc->done = 0;
6809         drpc->vp = vp;
6810         drpc->off = offset;
6811         drpc->len = cnt;
6812         drpc->dsp = dsp;
6813         drpc->fhp = fhp;
6814         drpc->vers = vers;
6815         drpc->minorvers = minorvers;
6816         drpc->cred = cred;
6817         drpc->p = p;
6818         drpc->inprog = 0;
6819         ret = EIO;
6820         if (nfs_pnfsiothreads != 0) {
6821                 ret = nfs_pnfsio(start_commitds, drpc);
6822                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
6823         }
6824         if (ret != 0)
6825                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
6826                     minorvers, cred, p);
6827         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
6828         return (error);
6829 }
6830
6831 /*
6832  * NFS Advise rpc
6833  */
6834 int
6835 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
6836     struct ucred *cred, NFSPROC_T *p)
6837 {
6838         u_int32_t *tl;
6839         struct nfsrv_descript nfsd, *nd = &nfsd;
6840         nfsattrbit_t hints;
6841         int error;
6842
6843         NFSZERO_ATTRBIT(&hints);
6844         if (advise == POSIX_FADV_WILLNEED)
6845                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6846         else if (advise == POSIX_FADV_DONTNEED)
6847                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6848         else
6849                 return (0);
6850         NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp);
6851         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6852         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
6853         txdr_hyper(offset, tl);
6854         tl += 2;
6855         txdr_hyper(cnt, tl);
6856         nfsrv_putattrbit(nd, &hints);
6857         error = nfscl_request(nd, vp, p, cred, NULL);
6858         if (error != 0)
6859                 return (error);
6860         if (nd->nd_repstat != 0)
6861                 error = nd->nd_repstat;
6862         m_freem(nd->nd_mrep);
6863         return (error);
6864 }
6865
6866 #ifdef notyet
6867 /*
6868  * NFS advise rpc to a NFSv4.2 DS.
6869  */
6870 static int
6871 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6872     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6873     struct ucred *cred, NFSPROC_T *p)
6874 {
6875         uint32_t *tl;
6876         struct nfsrv_descript nfsd, *nd = &nfsd;
6877         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6878         struct nfssockreq *nrp;
6879         nfsattrbit_t hints;
6880         int error;
6881
6882         /* For NFS DSs prior to NFSv4.2, just return OK. */
6883         if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
6884                 return (0);
6885         NFSZERO_ATTRBIT(&hints);
6886         if (advise == POSIX_FADV_WILLNEED)
6887                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
6888         else if (advise == POSIX_FADV_DONTNEED)
6889                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
6890         else
6891                 return (0);
6892         nd->nd_mrep = NULL;
6893         nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
6894             fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers);
6895         vers = NFS_VER4;
6896         NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
6897             minorvers);
6898         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
6899         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6900         txdr_hyper(offset, tl);
6901         tl += 2;
6902         *tl = txdr_unsigned(cnt);
6903         nfsrv_putattrbit(nd, &hints);
6904         nrp = dsp->nfsclds_sockp;
6905         if (nrp == NULL)
6906                 /* If NULL, use the MDS socket. */
6907                 nrp = &nmp->nm_sockreq;
6908         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6909             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6910         NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
6911             nd->nd_repstat);
6912         if (error != 0)
6913                 return (error);
6914         if (nd->nd_repstat != 0)
6915                 error = nd->nd_repstat;
6916         m_freem(nd->nd_mrep);
6917         return (error);
6918 }
6919
6920 /*
6921  * Start up the thread that will execute nfsrpc_commitds().
6922  */
6923 static void
6924 start_adviseds(void *arg, int pending)
6925 {
6926         struct nfsclwritedsdorpc *drpc;
6927
6928         drpc = (struct nfsclwritedsdorpc *)arg;
6929         drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
6930             drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
6931             drpc->cred, drpc->p);
6932         drpc->done = 1;
6933         NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
6934 }
6935
6936 /*
6937  * Set up the commit DS mirror call for the pNFS I/O thread.
6938  */
6939 static int
6940 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
6941     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
6942     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6943 {
6944         int error, ret;
6945
6946         error = 0;
6947         drpc->done = 0;
6948         drpc->vp = vp;
6949         drpc->off = offset;
6950         drpc->len = cnt;
6951         drpc->advise = advise;
6952         drpc->dsp = dsp;
6953         drpc->fhp = fhp;
6954         drpc->vers = vers;
6955         drpc->minorvers = minorvers;
6956         drpc->cred = cred;
6957         drpc->p = p;
6958         drpc->inprog = 0;
6959         ret = EIO;
6960         if (nfs_pnfsiothreads != 0) {
6961                 ret = nfs_pnfsio(start_adviseds, drpc);
6962                 NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
6963         }
6964         if (ret != 0)
6965                 error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
6966                     minorvers, cred, p);
6967         NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
6968         return (error);
6969 }
6970 #endif  /* notyet */
6971
6972 /*
6973  * Do the Allocate operation, retrying for recovery.
6974  */
6975 int
6976 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
6977     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
6978 {
6979         int error, expireret = 0, retrycnt, nostateid;
6980         uint32_t clidrev = 0;
6981         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6982         struct nfsfh *nfhp = NULL;
6983         nfsv4stateid_t stateid;
6984         off_t tmp_off;
6985         void *lckp;
6986
6987         if (len < 0)
6988                 return (EINVAL);
6989         if (len == 0)
6990                 return (0);
6991         tmp_off = off + len;
6992         NFSLOCKMNT(nmp);
6993         if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
6994                 NFSUNLOCKMNT(nmp);
6995                 return (EFBIG);
6996         }
6997         if (nmp->nm_clp != NULL)
6998                 clidrev = nmp->nm_clp->nfsc_clientidrev;
6999         NFSUNLOCKMNT(nmp);
7000         nfhp = VTONFS(vp)->n_fhp;
7001         retrycnt = 0;
7002         do {
7003                 lckp = NULL;
7004                 nostateid = 0;
7005                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7006                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7007                 if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7008                     stateid.other[2] == 0) {
7009                         nostateid = 1;
7010                         NFSCL_DEBUG(1, "stateid0 in allocate\n");
7011                 }
7012
7013                 /*
7014                  * Not finding a stateid should probably never happen,
7015                  * but just return an error for this case.
7016                  */
7017                 if (nostateid != 0)
7018                         error = EIO;
7019                 else
7020                         error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7021                             nap, attrflagp, cred, p, stuff);
7022                 if (error == NFSERR_STALESTATEID)
7023                         nfscl_initiate_recovery(nmp->nm_clp);
7024                 if (lckp != NULL)
7025                         nfscl_lockderef(lckp);
7026                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7027                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7028                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7029                         (void) nfs_catnap(PZERO, error, "nfs_allocate");
7030                 } else if ((error == NFSERR_EXPIRED ||
7031                     error == NFSERR_BADSTATEID) && clidrev != 0) {
7032                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7033                 }
7034                 retrycnt++;
7035         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7036             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7037             error == NFSERR_STALEDONTRECOVER ||
7038             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7039             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7040              expireret == 0 && clidrev != 0 && retrycnt < 4));
7041         if (error != 0 && retrycnt >= 4)
7042                 error = EIO;
7043         return (error);
7044 }
7045
7046 /*
7047  * The allocate RPC.
7048  */
7049 static int
7050 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7051     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p,
7052     void *stuff)
7053 {
7054         uint32_t *tl;
7055         int error;
7056         struct nfsrv_descript nfsd;
7057         struct nfsrv_descript *nd = &nfsd;
7058         nfsattrbit_t attrbits;
7059
7060         *attrflagp = 0;
7061         NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp);
7062         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7063         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7064         txdr_hyper(off, tl); tl += 2;
7065         txdr_hyper(len, tl); tl += 2;
7066         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7067         NFSGETATTR_ATTRBIT(&attrbits);
7068         nfsrv_putattrbit(nd, &attrbits);
7069         error = nfscl_request(nd, vp, p, cred, stuff);
7070         if (error != 0)
7071                 return (error);
7072         if (nd->nd_repstat == 0) {
7073                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7074                 error = nfsm_loadattr(nd, nap);
7075                 if (error == 0)
7076                         *attrflagp = NFS_LATTR_NOSHRINK;
7077         } else
7078                 error = nd->nd_repstat;
7079 nfsmout:
7080         m_freem(nd->nd_mrep);
7081         return (error);
7082 }
7083
7084 /*
7085  * Set up the XDR arguments for the LayoutGet operation.
7086  */
7087 static void
7088 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7089     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7090     int layoutlen, int usecurstateid)
7091 {
7092         uint32_t *tl;
7093
7094         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7095             NFSX_STATEID);
7096         *tl++ = newnfs_false;           /* Don't signal availability. */
7097         *tl++ = txdr_unsigned(layouttype);
7098         *tl++ = txdr_unsigned(iomode);
7099         txdr_hyper(offset, tl);
7100         tl += 2;
7101         txdr_hyper(len, tl);
7102         tl += 2;
7103         txdr_hyper(minlen, tl);
7104         tl += 2;
7105         if (usecurstateid != 0) {
7106                 /* Special stateid for Current stateid. */
7107                 *tl++ = txdr_unsigned(1);
7108                 *tl++ = 0;
7109                 *tl++ = 0;
7110                 *tl++ = 0;
7111         } else {
7112                 *tl++ = txdr_unsigned(stateidp->seqid);
7113                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7114                 *tl++ = stateidp->other[0];
7115                 *tl++ = stateidp->other[1];
7116                 *tl++ = stateidp->other[2];
7117         }
7118         *tl = txdr_unsigned(layoutlen);
7119 }
7120
7121 /*
7122  * Parse the reply for a successful LayoutGet operation.
7123  */
7124 static int
7125 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7126     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7127 {
7128         uint32_t *tl;
7129         struct nfsclflayout *flp, *prevflp, *tflp;
7130         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7131         int m, mirrorcnt;
7132         uint64_t retlen, off;
7133         struct nfsfh *nfhp;
7134         uint8_t *cp;
7135         uid_t user;
7136         gid_t grp;
7137
7138         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7139         error = 0;
7140         flp = NULL;
7141         gotiomode = -1;
7142         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7143         if (*tl++ != 0)
7144                 *retonclosep = 1;
7145         else
7146                 *retonclosep = 0;
7147         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7148         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7149             (int)stateidp->seqid);
7150         stateidp->other[0] = *tl++;
7151         stateidp->other[1] = *tl++;
7152         stateidp->other[2] = *tl++;
7153         cnt = fxdr_unsigned(int, *tl);
7154         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7155         if (cnt <= 0 || cnt > 10000) {
7156                 /* Don't accept more than 10000 layouts in reply. */
7157                 error = NFSERR_BADXDR;
7158                 goto nfsmout;
7159         }
7160         for (i = 0; i < cnt; i++) {
7161                 /* Dissect to the layout type. */
7162                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7163                     3 * NFSX_UNSIGNED);
7164                 off = fxdr_hyper(tl); tl += 2;
7165                 retlen = fxdr_hyper(tl); tl += 2;
7166                 iomode = fxdr_unsigned(int, *tl++);
7167                 laytype = fxdr_unsigned(int, *tl);
7168                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7169                     (uintmax_t)off, (uintmax_t)retlen, iomode);
7170                 /* Ignore length of layout body for now. */
7171                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7172                         /* Parse the File layout up to fhcnt. */
7173                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7174                             NFSX_HYPER + NFSX_V4DEVICEID);
7175                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
7176                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
7177                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7178                         if (fhcnt < 0 || fhcnt > 100) {
7179                                 /* Don't accept more than 100 file handles. */
7180                                 error = NFSERR_BADXDR;
7181                                 goto nfsmout;
7182                         }
7183                         if (fhcnt > 0)
7184                                 flp = malloc(sizeof(*flp) + fhcnt *
7185                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
7186                                     M_WAITOK);
7187                         else
7188                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7189                                     M_WAITOK);
7190                         flp->nfsfl_flags = NFSFL_FILE;
7191                         flp->nfsfl_fhcnt = 0;
7192                         flp->nfsfl_devp = NULL;
7193                         flp->nfsfl_off = off;
7194                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7195                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7196                         else
7197                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7198                         flp->nfsfl_iomode = iomode;
7199                         if (gotiomode == -1)
7200                                 gotiomode = flp->nfsfl_iomode;
7201                         /* Ignore layout body length for now. */
7202                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7203                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7204                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7205                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7206                         mtx_lock(&nmp->nm_mtx);
7207                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7208                             NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7209                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7210                         mtx_unlock(&nmp->nm_mtx);
7211                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7212                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7213                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7214                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7215                         for (j = 0; j < fhcnt; j++) {
7216                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7217                                 nfhlen = fxdr_unsigned(int, *tl);
7218                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7219                                         error = NFSERR_BADXDR;
7220                                         goto nfsmout;
7221                                 }
7222                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7223                                     M_NFSFH, M_WAITOK);
7224                                 flp->nfsfl_fh[j] = nfhp;
7225                                 flp->nfsfl_fhcnt++;
7226                                 nfhp->nfh_len = nfhlen;
7227                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7228                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7229                         }
7230                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
7231                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7232                             NFSX_HYPER);
7233                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7234                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7235                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7236                                 error = NFSERR_BADXDR;
7237                                 goto nfsmout;
7238                         }
7239                         flp = malloc(sizeof(*flp) + mirrorcnt *
7240                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7241                         flp->nfsfl_flags = NFSFL_FLEXFILE;
7242                         flp->nfsfl_mirrorcnt = mirrorcnt;
7243                         for (j = 0; j < mirrorcnt; j++)
7244                                 flp->nfsfl_ffm[j].devp = NULL;
7245                         flp->nfsfl_off = off;
7246                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7247                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7248                         else
7249                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7250                         flp->nfsfl_iomode = iomode;
7251                         if (gotiomode == -1)
7252                                 gotiomode = flp->nfsfl_iomode;
7253                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
7254                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
7255                             (uintmax_t)flp->nfsfl_stripeunit);
7256                         for (j = 0; j < mirrorcnt; j++) {
7257                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7258                                 k = fxdr_unsigned(int, *tl);
7259                                 if (k < 1 || k > 128) {
7260                                         error = NFSERR_BADXDR;
7261                                         goto nfsmout;
7262                                 }
7263                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
7264                                 for (l = 0; l < k; l++) {
7265                                         NFSM_DISSECT(tl, uint32_t *,
7266                                             NFSX_V4DEVICEID + NFSX_STATEID +
7267                                             2 * NFSX_UNSIGNED);
7268                                         if (l == 0) {
7269                                                 /* Just use the first server. */
7270                                                 NFSBCOPY(tl,
7271                                                     flp->nfsfl_ffm[j].dev,
7272                                                     NFSX_V4DEVICEID);
7273                                                 tl += (NFSX_V4DEVICEID /
7274                                                     NFSX_UNSIGNED);
7275                                                 tl++;
7276                                                 flp->nfsfl_ffm[j].st.seqid =
7277                                                     *tl++;
7278                                                 flp->nfsfl_ffm[j].st.other[0] =
7279                                                     *tl++;
7280                                                 flp->nfsfl_ffm[j].st.other[1] =
7281                                                     *tl++;
7282                                                 flp->nfsfl_ffm[j].st.other[2] =
7283                                                     *tl++;
7284                                                 NFSCL_DEBUG(4, "st.seqid=%u "
7285                                                  "st.o0=0x%x st.o1=0x%x "
7286                                                  "st.o2=0x%x\n",
7287                                                  flp->nfsfl_ffm[j].st.seqid,
7288                                                  flp->nfsfl_ffm[j].st.other[0],
7289                                                  flp->nfsfl_ffm[j].st.other[1],
7290                                                  flp->nfsfl_ffm[j].st.other[2]);
7291                                         } else
7292                                                 tl += ((NFSX_V4DEVICEID +
7293                                                     NFSX_STATEID +
7294                                                     NFSX_UNSIGNED) /
7295                                                     NFSX_UNSIGNED);
7296                                         fhcnt = fxdr_unsigned(int, *tl);
7297                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7298                                         if (fhcnt < 1 ||
7299                                             fhcnt > NFSDEV_MAXVERS) {
7300                                                 error = NFSERR_BADXDR;
7301                                                 goto nfsmout;
7302                                         }
7303                                         for (m = 0; m < fhcnt; m++) {
7304                                                 NFSM_DISSECT(tl, uint32_t *,
7305                                                     NFSX_UNSIGNED);
7306                                                 nfhlen = fxdr_unsigned(int,
7307                                                     *tl);
7308                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
7309                                                     nfhlen);
7310                                                 if (nfhlen <= 0 || nfhlen >
7311                                                     NFSX_V4FHMAX) {
7312                                                         error = NFSERR_BADXDR;
7313                                                         goto nfsmout;
7314                                                 }
7315                                                 NFSM_DISSECT(cp, uint8_t *,
7316                                                     NFSM_RNDUP(nfhlen));
7317                                                 if (l == 0) {
7318                                                         flp->nfsfl_ffm[j].fhcnt 
7319                                                             = fhcnt;
7320                                                         nfhp = malloc(
7321                                                             sizeof(*nfhp) +
7322                                                             nfhlen - 1, M_NFSFH,
7323                                                             M_WAITOK);
7324                                                         flp->nfsfl_ffm[j].fh[m]
7325                                                             = nfhp;
7326                                                         nfhp->nfh_len = nfhlen;
7327                                                         NFSBCOPY(cp,
7328                                                             nfhp->nfh_fh,
7329                                                             nfhlen);
7330                                                         NFSCL_DEBUG(4,
7331                                                             "got fh\n");
7332                                                 }
7333                                         }
7334                                         /* Now, get the ffsd_user/ffds_group. */
7335                                         error = nfsrv_parseug(nd, 0, &user,
7336                                             &grp, curthread);
7337                                         NFSCL_DEBUG(4, "after parseu=%d\n",
7338                                             error);
7339                                         if (error == 0)
7340                                                 error = nfsrv_parseug(nd, 1,
7341                                                     &user, &grp, curthread);
7342                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
7343                                             grp);
7344                                         if (error != 0)
7345                                                 goto nfsmout;
7346                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
7347                                             user, grp);
7348                                         if (l == 0) {
7349                                                 flp->nfsfl_ffm[j].user = user;
7350                                                 flp->nfsfl_ffm[j].group = grp;
7351                                                 NFSCL_DEBUG(4,
7352                                                     "usr=%d grp=%d\n", user,
7353                                                     grp);
7354                                         }
7355                                 }
7356                         }
7357                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7358                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7359 #ifdef notnow
7360                         /*
7361                          * At this time, there is no flag.
7362                          * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7363                          * added, or it may never exist?
7364                          */
7365                         mtx_lock(&nmp->nm_mtx);
7366                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7367                             NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7368                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7369                         mtx_unlock(&nmp->nm_mtx);
7370 #endif
7371                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7372                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7373                             flp->nfsfl_fflags, flp->nfsfl_statshint);
7374                 } else {
7375                         error = NFSERR_BADXDR;
7376                         goto nfsmout;
7377                 }
7378                 if (flp->nfsfl_iomode == gotiomode) {
7379                         /* Keep the list in increasing offset order. */
7380                         tflp = LIST_FIRST(flhp);
7381                         prevflp = NULL;
7382                         while (tflp != NULL &&
7383                             tflp->nfsfl_off < flp->nfsfl_off) {
7384                                 prevflp = tflp;
7385                                 tflp = LIST_NEXT(tflp, nfsfl_list);
7386                         }
7387                         if (prevflp == NULL)
7388                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7389                         else
7390                                 LIST_INSERT_AFTER(prevflp, flp,
7391                                     nfsfl_list);
7392                         NFSCL_DEBUG(4, "flp inserted\n");
7393                 } else {
7394                         printf("nfscl_layoutget(): got wrong iomode\n");
7395                         nfscl_freeflayout(flp);
7396                 }
7397                 flp = NULL;
7398         }
7399 nfsmout:
7400         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7401         if (error != 0 && flp != NULL)
7402                 nfscl_freeflayout(flp);
7403         return (error);
7404 }
7405
7406 /*
7407  * Parse a user/group digit string.
7408  */
7409 static int
7410 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7411     NFSPROC_T *p)
7412 {
7413         uint32_t *tl;
7414         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7415         uint32_t len = 0;
7416         int error = 0;
7417
7418         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7419         len = fxdr_unsigned(uint32_t, *tl);
7420         str = NULL;
7421         if (len > NFSV4_OPAQUELIMIT) {
7422                 error = NFSERR_BADXDR;
7423                 goto nfsmout;
7424         }
7425         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7426         if (len == 0) {
7427                 if (dogrp != 0)
7428                         *gidp = GID_NOGROUP;
7429                 else
7430                         *uidp = UID_NOBODY;
7431                 return (0);
7432         }
7433         if (len > NFSV4_SMALLSTR)
7434                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7435         else
7436                 str = str0;
7437         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7438         NFSBCOPY(cp, str, len);
7439         str[len] = '\0';
7440         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7441         if (dogrp != 0)
7442                 error = nfsv4_strtogid(nd, str, len, gidp);
7443         else
7444                 error = nfsv4_strtouid(nd, str, len, uidp);
7445 nfsmout:
7446         if (len > NFSV4_SMALLSTR)
7447                 free(str, M_TEMP);
7448         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7449         return (error);
7450 }
7451
7452 /*
7453  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7454  * so that it does both an Open and a Layoutget.
7455  */
7456 static int
7457 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7458     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7459     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7460     struct ucred *cred, NFSPROC_T *p)
7461 {
7462         struct nfscllayout *lyp;
7463         struct nfsclflayout *flp;
7464         struct nfsclflayouthead flh;
7465         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7466         int layouttype, laystat;
7467         nfsv4stateid_t stateid;
7468         struct nfsclsession *tsep;
7469
7470         error = 0;
7471         if (NFSHASFLEXFILE(nmp))
7472                 layouttype = NFSLAYOUT_FLEXFILE;
7473         else
7474                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7475         /*
7476          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7477          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7478          * flp == NULL.
7479          */
7480         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
7481             &recalled);
7482         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7483         if (lyp == NULL)
7484                 islocked = 0;
7485         else if (flp != NULL)
7486                 islocked = 1;
7487         else
7488                 islocked = 2;
7489         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7490                 LIST_INIT(&flh);
7491                 tsep = nfsmnt_mdssession(nmp);
7492                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7493                     3 * NFSX_UNSIGNED);
7494                 if (lyp == NULL)
7495                         usecurstateid = 1;
7496                 else {
7497                         usecurstateid = 0;
7498                         stateid.seqid = lyp->nfsly_stateid.seqid;
7499                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7500                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7501                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7502                 }
7503                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7504                     newfhp, newfhlen, mode, op, name, namelen,
7505                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7506                     &retonclose, &flh, &laystat, cred, p);
7507                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7508                     laystat, error);
7509                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7510                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7511                     &islocked, cred, p);
7512         } else
7513                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7514                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7515         if (islocked == 2)
7516                 nfscl_rellayout(lyp, 1);
7517         else if (islocked == 1)
7518                 nfscl_rellayout(lyp, 0);
7519         return (error);
7520 }
7521
7522 /*
7523  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7524  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7525  * handled by nfsrpc_openrpc().
7526  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7527  * can be NULL.
7528  */
7529 static int
7530 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7531     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7532     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7533     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7534     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7535     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7536 {
7537         uint32_t *tl;
7538         struct nfsrv_descript nfsd, *nd = &nfsd;
7539         struct nfscldeleg *ndp = NULL;
7540         struct nfsvattr nfsva;
7541         struct nfsclsession *tsep;
7542         uint32_t rflags, deleg;
7543         nfsattrbit_t attrbits;
7544         int error, ret, acesize, limitby, iomode;
7545
7546         *dpp = NULL;
7547         *laystatp = ENXIO;
7548         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7549             0, 0);
7550         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7551         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7552         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7553         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7554         tsep = nfsmnt_mdssession(nmp);
7555         *tl++ = tsep->nfsess_clientid.lval[0];
7556         *tl = tsep->nfsess_clientid.lval[1];
7557         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7558         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7559         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7560         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7561         nfsm_strtom(nd, name, namelen);
7562         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7563         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7564         NFSZERO_ATTRBIT(&attrbits);
7565         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7566         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7567         nfsrv_putattrbit(nd, &attrbits);
7568         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7569         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7570         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7571                 iomode = NFSLAYOUTIOMODE_RW;
7572         else
7573                 iomode = NFSLAYOUTIOMODE_READ;
7574         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7575             layouttype, layoutlen, usecurstateid);
7576         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7577             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7578         if (error != 0)
7579                 return (error);
7580         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7581         if (nd->nd_repstat != 0)
7582                 *laystatp = nd->nd_repstat;
7583         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7584                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7585                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7586                     6 * NFSX_UNSIGNED);
7587                 op->nfso_stateid.seqid = *tl++;
7588                 op->nfso_stateid.other[0] = *tl++;
7589                 op->nfso_stateid.other[1] = *tl++;
7590                 op->nfso_stateid.other[2] = *tl;
7591                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7592                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7593                 if (error != 0)
7594                         goto nfsmout;
7595                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7596                 deleg = fxdr_unsigned(u_int32_t, *tl);
7597                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7598                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7599                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7600                               NFSCLFLAGS_FIRSTDELEG))
7601                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7602                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7603                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7604                             M_NFSCLDELEG, M_WAITOK);
7605                         LIST_INIT(&ndp->nfsdl_owner);
7606                         LIST_INIT(&ndp->nfsdl_lock);
7607                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7608                         ndp->nfsdl_fhlen = newfhlen;
7609                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7610                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7611                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7612                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7613                             NFSX_UNSIGNED);
7614                         ndp->nfsdl_stateid.seqid = *tl++;
7615                         ndp->nfsdl_stateid.other[0] = *tl++;
7616                         ndp->nfsdl_stateid.other[1] = *tl++;
7617                         ndp->nfsdl_stateid.other[2] = *tl++;
7618                         ret = fxdr_unsigned(int, *tl);
7619                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7620                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7621                                 /*
7622                                  * Indicates how much the file can grow.
7623                                  */
7624                                 NFSM_DISSECT(tl, u_int32_t *,
7625                                     3 * NFSX_UNSIGNED);
7626                                 limitby = fxdr_unsigned(int, *tl++);
7627                                 switch (limitby) {
7628                                 case NFSV4OPEN_LIMITSIZE:
7629                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7630                                         break;
7631                                 case NFSV4OPEN_LIMITBLOCKS:
7632                                         ndp->nfsdl_sizelimit =
7633                                             fxdr_unsigned(u_int64_t, *tl++);
7634                                         ndp->nfsdl_sizelimit *=
7635                                             fxdr_unsigned(u_int64_t, *tl);
7636                                         break;
7637                                 default:
7638                                         error = NFSERR_BADXDR;
7639                                         goto nfsmout;
7640                                 };
7641                         } else
7642                                 ndp->nfsdl_flags = NFSCLDL_READ;
7643                         if (ret != 0)
7644                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7645                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
7646                             &acesize, p);
7647                         if (error != 0)
7648                                 goto nfsmout;
7649                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7650                         error = NFSERR_BADXDR;
7651                         goto nfsmout;
7652                 }
7653                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7654                     nfscl_assumeposixlocks)
7655                         op->nfso_posixlock = 1;
7656                 else
7657                         op->nfso_posixlock = 0;
7658                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7659                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7660                 if (*++tl == 0) {
7661                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7662                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7663                             NULL, NULL, NULL, p, cred);
7664                         if (error != 0)
7665                                 goto nfsmout;
7666                         if (ndp != NULL) {
7667                                 ndp->nfsdl_change = nfsva.na_filerev;
7668                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7669                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7670                                 *dpp = ndp;
7671                                 ndp = NULL;
7672                         }
7673                         /*
7674                          * At this point, the Open has succeeded, so set
7675                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7676                          * this function just won't return a layout.
7677                          */
7678                         if (nd->nd_repstat == 0) {
7679                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7680                                 *laystatp = fxdr_unsigned(int, *++tl);
7681                                 if (*laystatp == 0) {
7682                                         error = nfsrv_parselayoutget(nmp, nd,
7683                                             stateidp, retonclosep, flhp);
7684                                         if (error != 0)
7685                                                 *laystatp = error;
7686                                 }
7687                         } else
7688                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7689                 }
7690         }
7691         if (nd->nd_repstat != 0 && error == 0)
7692                 error = nd->nd_repstat;
7693 nfsmout:
7694         free(ndp, M_NFSCLDELEG);
7695         m_freem(nd->nd_mrep);
7696         return (error);
7697 }
7698
7699 /*
7700  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7701  * Used only for mounts with pNFS enabled.
7702  */
7703 static int
7704 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7705     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7706     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7707     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7708     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7709     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7710     struct nfsclflayouthead *flhp, int *laystatp)
7711 {
7712         uint32_t *tl;
7713         int error = 0, deleg, newone, ret, acesize, limitby;
7714         struct nfsrv_descript nfsd, *nd = &nfsd;
7715         struct nfsclopen *op;
7716         struct nfscldeleg *dp = NULL;
7717         struct nfsnode *np;
7718         struct nfsfh *nfhp;
7719         struct nfsclsession *tsep;
7720         nfsattrbit_t attrbits;
7721         nfsv4stateid_t stateid;
7722         struct nfsmount *nmp;
7723
7724         nmp = VFSTONFS(dvp->v_mount);
7725         np = VTONFS(dvp);
7726         *laystatp = ENXIO;
7727         *unlockedp = 0;
7728         *nfhpp = NULL;
7729         *dpp = NULL;
7730         *attrflagp = 0;
7731         *dattrflagp = 0;
7732         if (namelen > NFS_MAXNAMLEN)
7733                 return (ENAMETOOLONG);
7734         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
7735         /*
7736          * For V4, this is actually an Open op.
7737          */
7738         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7739         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7740         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7741             NFSV4OPEN_ACCESSREAD);
7742         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7743         tsep = nfsmnt_mdssession(nmp);
7744         *tl++ = tsep->nfsess_clientid.lval[0];
7745         *tl = tsep->nfsess_clientid.lval[1];
7746         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7747         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7748         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7749         if ((fmode & O_EXCL) != 0) {
7750                 if (NFSHASSESSPERSIST(nmp)) {
7751                         /* Use GUARDED for persistent sessions. */
7752                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
7753                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7754                 } else {
7755                         /* Otherwise, use EXCLUSIVE4_1. */
7756                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
7757                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
7758                         *tl++ = cverf.lval[0];
7759                         *tl = cverf.lval[1];
7760                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
7761                 }
7762         } else {
7763                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
7764                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
7765         }
7766         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7767         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7768         nfsm_strtom(nd, name, namelen);
7769         /* Get the new file's handle and attributes, plus save the FH. */
7770         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
7771         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
7772         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
7773         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7774         NFSGETATTR_ATTRBIT(&attrbits);
7775         nfsrv_putattrbit(nd, &attrbits);
7776         /* Get the directory's post-op attributes. */
7777         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7778         *tl = txdr_unsigned(NFSV4OP_PUTFH);
7779         nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
7780         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
7781         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7782         nfsrv_putattrbit(nd, &attrbits);
7783         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7784         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
7785         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7786         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
7787             layouttype, layoutlen, usecurstateid);
7788         error = nfscl_request(nd, dvp, p, cred, dstuff);
7789         if (error != 0)
7790                 return (error);
7791         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
7792             error);
7793         if (nd->nd_repstat != 0)
7794                 *laystatp = nd->nd_repstat;
7795         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
7796         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7797                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
7798                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7799                     6 * NFSX_UNSIGNED);
7800                 stateid.seqid = *tl++;
7801                 stateid.other[0] = *tl++;
7802                 stateid.other[1] = *tl++;
7803                 stateid.other[2] = *tl;
7804                 nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7805                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7806                 deleg = fxdr_unsigned(int, *tl);
7807                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7808                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7809                         if (!(owp->nfsow_clp->nfsc_flags &
7810                               NFSCLFLAGS_FIRSTDELEG))
7811                                 owp->nfsow_clp->nfsc_flags |=
7812                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7813                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
7814                             M_NFSCLDELEG, M_WAITOK);
7815                         LIST_INIT(&dp->nfsdl_owner);
7816                         LIST_INIT(&dp->nfsdl_lock);
7817                         dp->nfsdl_clp = owp->nfsow_clp;
7818                         newnfs_copyincred(cred, &dp->nfsdl_cred);
7819                         nfscl_lockinit(&dp->nfsdl_rwlock);
7820                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7821                             NFSX_UNSIGNED);
7822                         dp->nfsdl_stateid.seqid = *tl++;
7823                         dp->nfsdl_stateid.other[0] = *tl++;
7824                         dp->nfsdl_stateid.other[1] = *tl++;
7825                         dp->nfsdl_stateid.other[2] = *tl++;
7826                         ret = fxdr_unsigned(int, *tl);
7827                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7828                                 dp->nfsdl_flags = NFSCLDL_WRITE;
7829                                 /*
7830                                  * Indicates how much the file can grow.
7831                                  */
7832                                 NFSM_DISSECT(tl, u_int32_t *,
7833                                     3 * NFSX_UNSIGNED);
7834                                 limitby = fxdr_unsigned(int, *tl++);
7835                                 switch (limitby) {
7836                                 case NFSV4OPEN_LIMITSIZE:
7837                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
7838                                         break;
7839                                 case NFSV4OPEN_LIMITBLOCKS:
7840                                         dp->nfsdl_sizelimit =
7841                                             fxdr_unsigned(u_int64_t, *tl++);
7842                                         dp->nfsdl_sizelimit *=
7843                                             fxdr_unsigned(u_int64_t, *tl);
7844                                         break;
7845                                 default:
7846                                         error = NFSERR_BADXDR;
7847                                         goto nfsmout;
7848                                 };
7849                         } else {
7850                                 dp->nfsdl_flags = NFSCLDL_READ;
7851                         }
7852                         if (ret != 0)
7853                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
7854                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
7855                             &acesize, p);
7856                         if (error != 0)
7857                                 goto nfsmout;
7858                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7859                         error = NFSERR_BADXDR;
7860                         goto nfsmout;
7861                 }
7862
7863                 /* Now, we should have the status for the SaveFH. */
7864                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7865                 if (*++tl == 0) {
7866                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
7867                         /*
7868                          * Now, process the GetFH and Getattr for the newly
7869                          * created file. nfscl_mtofh() will set
7870                          * ND_NOMOREDATA if these weren't successful.
7871                          */
7872                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
7873                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
7874                         if (error != 0)
7875                                 goto nfsmout;
7876                 } else
7877                         nd->nd_flag |= ND_NOMOREDATA;
7878                 /* Now we have the PutFH and Getattr for the directory. */
7879                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7880                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7881                         if (*++tl != 0)
7882                                 nd->nd_flag |= ND_NOMOREDATA;
7883                         else {
7884                                 NFSM_DISSECT(tl, uint32_t *, 2 *
7885                                     NFSX_UNSIGNED);
7886                                 if (*++tl != 0)
7887                                         nd->nd_flag |= ND_NOMOREDATA;
7888                         }
7889                 }
7890                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7891                         /* Load the directory attributes. */
7892                         error = nfsm_loadattr(nd, dnap);
7893                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
7894                         if (error != 0)
7895                                 goto nfsmout;
7896                         *dattrflagp = 1;
7897                         if (dp != NULL && *attrflagp != 0) {
7898                                 dp->nfsdl_change = nnap->na_filerev;
7899                                 dp->nfsdl_modtime = nnap->na_mtime;
7900                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7901                         }
7902                         /*
7903                          * We can now complete the Open state.
7904                          */
7905                         nfhp = *nfhpp;
7906                         if (dp != NULL) {
7907                                 dp->nfsdl_fhlen = nfhp->nfh_len;
7908                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
7909                                     nfhp->nfh_len);
7910                         }
7911                         /*
7912                          * Get an Open structure that will be
7913                          * attached to the OpenOwner, acquired already.
7914                          */
7915                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
7916                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
7917                             cred, p, NULL, &op, &newone, NULL, 0);
7918                         if (error != 0)
7919                                 goto nfsmout;
7920                         op->nfso_stateid = stateid;
7921                         newnfs_copyincred(cred, &op->nfso_cred);
7922
7923                         nfscl_openrelease(nmp, op, error, newone);
7924                         *unlockedp = 1;
7925
7926                         /* Now, handle the RestoreFH and LayoutGet. */
7927                         if (nd->nd_repstat == 0) {
7928                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
7929                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
7930                                 if (*laystatp == 0) {
7931                                         error = nfsrv_parselayoutget(nmp, nd,
7932                                             stateidp, retonclosep, flhp);
7933                                         if (error != 0)
7934                                                 *laystatp = error;
7935                                 }
7936                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
7937                                     error);
7938                         } else
7939                                 nd->nd_repstat = 0;
7940                 }
7941         }
7942         if (nd->nd_repstat != 0 && error == 0)
7943                 error = nd->nd_repstat;
7944         if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
7945                 nfscl_initiate_recovery(owp->nfsow_clp);
7946 nfsmout:
7947         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
7948         if (error == 0)
7949                 *dpp = dp;
7950         else
7951                 free(dp, M_NFSCLDELEG);
7952         m_freem(nd->nd_mrep);
7953         return (error);
7954 }
7955
7956 /*
7957  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
7958  */
7959 static int
7960 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7961     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7962     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7963     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7964     int *dattrflagp, void *dstuff, int *unlockedp)
7965 {
7966         struct nfscllayout *lyp;
7967         struct nfsclflayouthead flh;
7968         struct nfsfh *nfhp;
7969         struct nfsclsession *tsep;
7970         struct nfsmount *nmp;
7971         nfsv4stateid_t stateid;
7972         int error, layoutlen, layouttype, retonclose, laystat;
7973
7974         error = 0;
7975         nmp = VFSTONFS(dvp->v_mount);
7976         if (NFSHASFLEXFILE(nmp))
7977                 layouttype = NFSLAYOUT_FLEXFILE;
7978         else
7979                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7980         LIST_INIT(&flh);
7981         tsep = nfsmnt_mdssession(nmp);
7982         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
7983         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
7984             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
7985             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
7986             &flh, &laystat);
7987         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
7988             laystat, error);
7989         lyp = NULL;
7990         if (laystat == 0) {
7991                 nfhp = *nfhpp;
7992                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
7993                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
7994                     layouttype, laystat, NULL, cred, p);
7995         } else
7996                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
7997                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
7998                     cred, p);
7999         if (laystat == 0)
8000                 nfscl_rellayout(lyp, 0);
8001         return (error);
8002 }
8003
8004 /*
8005  * Process the results of a layoutget() operation.
8006  */
8007 static int
8008 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8009     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8010     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8011     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8012 {
8013         struct nfsclflayout *tflp;
8014         struct nfscldevinfo *dip;
8015         uint8_t *dev;
8016         int i, mirrorcnt;
8017
8018         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8019                 NFSLOCKMNT(nmp);
8020                 if (!NFSHASFLEXFILE(nmp)) {
8021                         /* Switch to using Flex File Layout. */
8022                         nmp->nm_state |= NFSSTA_FLEXFILE;
8023                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
8024                         /* Disable pNFS. */
8025                         NFSCL_DEBUG(1, "disable PNFS\n");
8026                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8027                 }
8028                 NFSUNLOCKMNT(nmp);
8029         }
8030         if (laystat == 0) {
8031                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8032                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
8033                         if (layouttype == NFSLAYOUT_FLEXFILE)
8034                                 mirrorcnt = tflp->nfsfl_mirrorcnt;
8035                         else
8036                                 mirrorcnt = 1;
8037                         for (i = 0; i < mirrorcnt; i++) {
8038                                 laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8039                                 NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8040                                 if (laystat != 0) {
8041                                         if (layouttype == NFSLAYOUT_FLEXFILE)
8042                                                 dev = tflp->nfsfl_ffm[i].dev;
8043                                         else
8044                                                 dev = tflp->nfsfl_dev;
8045                                         laystat = nfsrpc_getdeviceinfo(nmp, dev,
8046                                             layouttype, notifybit, &dip, cred,
8047                                             p);
8048                                         NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8049                                             laystat);
8050                                         if (laystat != 0)
8051                                                 goto out;
8052                                         laystat = nfscl_adddevinfo(nmp, dip, i,
8053                                             tflp);
8054                                         if (laystat != 0)
8055                                                 printf("nfsrpc_layoutgetresout"
8056                                                     ": cannot add\n");
8057                                 }
8058                         }
8059                 }
8060         }
8061 out:
8062         if (laystat == 0) {
8063                 /*
8064                  * nfscl_layout() always returns with the nfsly_lock
8065                  * set to a refcnt (shared lock).
8066                  * Passing in dvp is sufficient, since it is only used to
8067                  * get the fsid for the file system.
8068                  */
8069                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8070                     layouttype, retonclose, flhp, lypp, cred, p);
8071                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8072                     laystat);
8073                 if (laystat == 0 && islockedp != NULL)
8074                         *islockedp = 1;
8075         }
8076         return (laystat);
8077 }
8078
8079 /*
8080  * nfs copy_file_range operation.
8081  */
8082 int
8083 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8084     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8085     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8086     struct ucred *cred, bool consecutive, bool *must_commitp)
8087 {
8088         int commit, error, expireret = 0, retrycnt;
8089         u_int32_t clidrev = 0;
8090         struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8091         struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8092         nfsv4stateid_t instateid, outstateid;
8093         void *inlckp, *outlckp;
8094
8095         if (nmp->nm_clp != NULL)
8096                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8097         innfhp = VTONFS(invp)->n_fhp;
8098         outnfhp = VTONFS(outvp)->n_fhp;
8099         retrycnt = 0;
8100         do {
8101                 /* Get both stateids. */
8102                 inlckp = NULL;
8103                 nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8104                     NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8105                     &inlckp);
8106                 outlckp = NULL;
8107                 nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8108                     NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8109                     &outlckp);
8110
8111                 error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8112                     &instateid, &outstateid, innap, inattrflagp, outnap,
8113                     outattrflagp, consecutive, &commit, cred, curthread);
8114                 if (error == 0) {
8115                         if (commit != NFSWRITE_FILESYNC)
8116                                 *must_commitp = true;
8117                         *inoffp += *lenp;
8118                         *outoffp += *lenp;
8119                 } else if (error == NFSERR_STALESTATEID)
8120                         nfscl_initiate_recovery(nmp->nm_clp);
8121                 if (inlckp != NULL)
8122                         nfscl_lockderef(inlckp);
8123                 if (outlckp != NULL)
8124                         nfscl_lockderef(outlckp);
8125                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8126                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8127                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8128                         (void) nfs_catnap(PZERO, error, "nfs_cfr");
8129                 } else if ((error == NFSERR_EXPIRED ||
8130                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8131                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8132                             curthread);
8133                 }
8134                 retrycnt++;
8135         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8136             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8137               error == NFSERR_STALEDONTRECOVER ||
8138             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8139             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8140              expireret == 0 && clidrev != 0 && retrycnt < 4));
8141         if (error != 0 && (retrycnt >= 4 ||
8142             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8143               error == NFSERR_STALEDONTRECOVER))
8144                 error = EIO;
8145         return (error);
8146 }
8147
8148 /*
8149  * The copy RPC.
8150  */
8151 static int
8152 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8153     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8154     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8155     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8156     NFSPROC_T *p)
8157 {
8158         uint32_t *tl;
8159         int error;
8160         struct nfsrv_descript nfsd;
8161         struct nfsrv_descript *nd = &nfsd;
8162         struct nfsmount *nmp;
8163         nfsattrbit_t attrbits;
8164         uint64_t len;
8165
8166         nmp = VFSTONFS(outvp->v_mount);
8167         *inattrflagp = *outattrflagp = 0;
8168         *commitp = NFSWRITE_UNSTABLE;
8169         len = *lenp;
8170         *lenp = 0;
8171         if (len > nfs_maxcopyrange)
8172                 len = nfs_maxcopyrange;
8173         NFSCL_REQSTART(nd, NFSPROC_COPY, invp);
8174         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8175         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8176         NFSGETATTR_ATTRBIT(&attrbits);
8177         nfsrv_putattrbit(nd, &attrbits);
8178         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8179         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8180         nfsm_fhtom(nd, VTONFS(outvp)->n_fhp->nfh_fh,
8181             VTONFS(outvp)->n_fhp->nfh_len, 0);
8182         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8183         *tl = txdr_unsigned(NFSV4OP_COPY);
8184         nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8185         nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8186         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8187         txdr_hyper(inoff, tl); tl += 2;
8188         txdr_hyper(outoff, tl); tl += 2;
8189         txdr_hyper(len, tl); tl += 2;
8190         if (consecutive)
8191                 *tl++ = newnfs_true;
8192         else
8193                 *tl++ = newnfs_false;
8194         *tl++ = newnfs_true;
8195         *tl++ = 0;
8196         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8197         NFSWRITEGETATTR_ATTRBIT(&attrbits);
8198         nfsrv_putattrbit(nd, &attrbits);
8199         error = nfscl_request(nd, invp, p, cred, NULL);
8200         if (error != 0)
8201                 return (error);
8202         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8203                 /* Get the input file's attributes. */
8204                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8205                 if (*(tl + 1) == 0) {
8206                         error = nfsm_loadattr(nd, innap);
8207                         if (error != 0)
8208                                 goto nfsmout;
8209                         *inattrflagp = 1;
8210                 } else
8211                         nd->nd_flag |= ND_NOMOREDATA;
8212         }
8213         /* Skip over return stat for PutFH. */
8214         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8215                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8216                 if (*++tl != 0)
8217                         nd->nd_flag |= ND_NOMOREDATA;
8218         }
8219         /* Skip over return stat for Copy. */
8220         if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8221                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8222         if (nd->nd_repstat == 0) {
8223                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8224                 if (*tl != 0) {
8225                         /* There should be no callback ids. */
8226                         error = NFSERR_BADXDR;
8227                         goto nfsmout;
8228                 }
8229                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8230                     NFSX_VERF);
8231                 len = fxdr_hyper(tl); tl += 2;
8232                 *commitp = fxdr_unsigned(int, *tl++);
8233                 NFSLOCKMNT(nmp);
8234                 if (!NFSHASWRITEVERF(nmp)) {
8235                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8236                         NFSSETWRITEVERF(nmp);
8237                 } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8238                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8239                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
8240                 }
8241                 NFSUNLOCKMNT(nmp);
8242                 tl += (NFSX_VERF / NFSX_UNSIGNED);
8243                 if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8244                         /* Must be a synchronous copy. */
8245                         nd->nd_repstat = NFSERR_NOTSUPP;
8246                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8247                 error = nfsm_loadattr(nd, outnap);
8248                 if (error == 0)
8249                         *outattrflagp = NFS_LATTR_NOSHRINK;
8250                 if (nd->nd_repstat == 0)
8251                         *lenp = len;
8252         } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8253                 /*
8254                  * For the case where consecutive is not supported, but
8255                  * synchronous is supported, we can try consecutive == false
8256                  * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8257                  * since Copy cannot be done.
8258                  */
8259                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8260                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8261                         if (!consecutive || *++tl == newnfs_false)
8262                                 nd->nd_repstat = NFSERR_NOTSUPP;
8263                 } else
8264                         nd->nd_repstat = NFSERR_BADXDR;
8265         }
8266         if (error == 0)
8267                 error = nd->nd_repstat;
8268 nfsmout:
8269         m_freem(nd->nd_mrep);
8270         return (error);
8271 }
8272
8273 /*
8274  * Seek operation.
8275  */
8276 int
8277 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8278     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8279 {
8280         int error, expireret = 0, retrycnt;
8281         u_int32_t clidrev = 0;
8282         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8283         struct nfsnode *np = VTONFS(vp);
8284         struct nfsfh *nfhp = NULL;
8285         nfsv4stateid_t stateid;
8286         void *lckp;
8287
8288         if (nmp->nm_clp != NULL)
8289                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8290         nfhp = np->n_fhp;
8291         retrycnt = 0;
8292         do {
8293                 lckp = NULL;
8294                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8295                     NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8296                 error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8297                     nap, attrflagp, cred);
8298                 if (error == NFSERR_STALESTATEID)
8299                         nfscl_initiate_recovery(nmp->nm_clp);
8300                 if (lckp != NULL)
8301                         nfscl_lockderef(lckp);
8302                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8303                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8304                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8305                         (void) nfs_catnap(PZERO, error, "nfs_seek");
8306                 } else if ((error == NFSERR_EXPIRED ||
8307                     error == NFSERR_BADSTATEID) && clidrev != 0) {
8308                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8309                             curthread);
8310                 }
8311                 retrycnt++;
8312         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8313             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8314             error == NFSERR_BADSESSION ||
8315             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8316             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8317              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8318             (error == NFSERR_OPENMODE && retrycnt < 4));
8319         if (error && retrycnt >= 4)
8320                 error = EIO;
8321         return (error);
8322 }
8323
8324 /*
8325  * The seek RPC.
8326  */
8327 static int
8328 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8329     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8330 {
8331         uint32_t *tl;
8332         int error;
8333         struct nfsrv_descript nfsd;
8334         struct nfsrv_descript *nd = &nfsd;
8335         nfsattrbit_t attrbits;
8336
8337         *attrflagp = 0;
8338         NFSCL_REQSTART(nd, NFSPROC_SEEK, vp);
8339         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8340         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8341         txdr_hyper(*offp, tl); tl += 2;
8342         *tl++ = txdr_unsigned(content);
8343         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8344         NFSGETATTR_ATTRBIT(&attrbits);
8345         nfsrv_putattrbit(nd, &attrbits);
8346         error = nfscl_request(nd, vp, curthread, cred, NULL);
8347         if (error != 0)
8348                 return (error);
8349         if (nd->nd_repstat == 0) {
8350                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8351                 if (*tl++ == newnfs_true)
8352                         *eofp = true;
8353                 else
8354                         *eofp = false;
8355                 *offp = fxdr_hyper(tl);
8356                 /* Just skip over Getattr op status. */
8357                 error = nfsm_loadattr(nd, nap);
8358                 if (error == 0)
8359                         *attrflagp = 1;
8360         }
8361         error = nd->nd_repstat;
8362 nfsmout:
8363         m_freem(nd->nd_mrep);
8364         return (error);
8365 }
8366
8367 /*
8368  * The getextattr RPC.
8369  */
8370 int
8371 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8372     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8373 {
8374         uint32_t *tl;
8375         int error;
8376         struct nfsrv_descript nfsd;
8377         struct nfsrv_descript *nd = &nfsd;
8378         nfsattrbit_t attrbits;
8379         uint32_t len, len2;
8380
8381         *attrflagp = 0;
8382         NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp);
8383         nfsm_strtom(nd, name, strlen(name));
8384         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8385         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8386         NFSGETATTR_ATTRBIT(&attrbits);
8387         nfsrv_putattrbit(nd, &attrbits);
8388         error = nfscl_request(nd, vp, p, cred, NULL);
8389         if (error != 0)
8390                 return (error);
8391         if (nd->nd_repstat == 0) {
8392                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8393                 len = fxdr_unsigned(uint32_t, *tl);
8394                 /* Sanity check lengths. */
8395                 if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8396                     uiop->uio_resid <= UINT32_MAX) {
8397                         len2 = uiop->uio_resid;
8398                         if (len2 >= len)
8399                                 error = nfsm_mbufuio(nd, uiop, len);
8400                         else {
8401                                 error = nfsm_mbufuio(nd, uiop, len2);
8402                                 if (error == 0) {
8403                                         /*
8404                                          * nfsm_mbufuio() advances to a multiple
8405                                          * of 4, so round up len2 as well.  Then
8406                                          * we need to advance over the rest of
8407                                          * the data, rounding up the remaining
8408                                          * length.
8409                                          */
8410                                         len2 = NFSM_RNDUP(len2);
8411                                         len2 = NFSM_RNDUP(len - len2);
8412                                         if (len2 > 0)
8413                                                 error = nfsm_advance(nd, len2,
8414                                                     -1);
8415                                 }
8416                         }
8417                 } else if (uiop == NULL && len > 0) {
8418                         /* Just wants the length and not the data. */
8419                         error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8420                 } else if (len > 0)
8421                         error = ENOATTR;
8422                 if (error != 0)
8423                         goto nfsmout;
8424                 *lenp = len;
8425                 /* Just skip over Getattr op status. */
8426                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8427                 error = nfsm_loadattr(nd, nap);
8428                 if (error == 0)
8429                         *attrflagp = 1;
8430         }
8431         if (error == 0)
8432                 error = nd->nd_repstat;
8433 nfsmout:
8434         m_freem(nd->nd_mrep);
8435         return (error);
8436 }
8437
8438 /*
8439  * The setextattr RPC.
8440  */
8441 int
8442 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8443     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8444 {
8445         uint32_t *tl;
8446         int error;
8447         struct nfsrv_descript nfsd;
8448         struct nfsrv_descript *nd = &nfsd;
8449         nfsattrbit_t attrbits;
8450
8451         *attrflagp = 0;
8452         NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp);
8453         if (uiop->uio_resid > nd->nd_maxreq) {
8454                 /* nd_maxreq is set by NFSCL_REQSTART(). */
8455                 m_freem(nd->nd_mreq);
8456                 return (EINVAL);
8457         }
8458         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8459         *tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8460         nfsm_strtom(nd, name, strlen(name));
8461         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8462         *tl = txdr_unsigned(uiop->uio_resid);
8463         nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8464         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8465         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8466         NFSGETATTR_ATTRBIT(&attrbits);
8467         nfsrv_putattrbit(nd, &attrbits);
8468         error = nfscl_request(nd, vp, p, cred, NULL);
8469         if (error != 0)
8470                 return (error);
8471         if (nd->nd_repstat == 0) {
8472                 /* Just skip over the reply and Getattr op status. */
8473                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8474                     NFSX_UNSIGNED);
8475                 error = nfsm_loadattr(nd, nap);
8476                 if (error == 0)
8477                         *attrflagp = 1;
8478         }
8479         if (error == 0)
8480                 error = nd->nd_repstat;
8481 nfsmout:
8482         m_freem(nd->nd_mrep);
8483         return (error);
8484 }
8485
8486 /*
8487  * The removeextattr RPC.
8488  */
8489 int
8490 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8491     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8492 {
8493         uint32_t *tl;
8494         int error;
8495         struct nfsrv_descript nfsd;
8496         struct nfsrv_descript *nd = &nfsd;
8497         nfsattrbit_t attrbits;
8498
8499         *attrflagp = 0;
8500         NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp);
8501         nfsm_strtom(nd, name, strlen(name));
8502         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8503         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8504         NFSGETATTR_ATTRBIT(&attrbits);
8505         nfsrv_putattrbit(nd, &attrbits);
8506         error = nfscl_request(nd, vp, p, cred, NULL);
8507         if (error != 0)
8508                 return (error);
8509         if (nd->nd_repstat == 0) {
8510                 /* Just skip over the reply and Getattr op status. */
8511                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8512                     NFSX_UNSIGNED);
8513                 error = nfsm_loadattr(nd, nap);
8514                 if (error == 0)
8515                         *attrflagp = 1;
8516         }
8517         if (error == 0)
8518                 error = nd->nd_repstat;
8519 nfsmout:
8520         m_freem(nd->nd_mrep);
8521         return (error);
8522 }
8523
8524 /*
8525  * The listextattr RPC.
8526  */
8527 int
8528 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8529     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8530     struct ucred *cred, NFSPROC_T *p)
8531 {
8532         uint32_t *tl;
8533         int cnt, error, i, len;
8534         struct nfsrv_descript nfsd;
8535         struct nfsrv_descript *nd = &nfsd;
8536         nfsattrbit_t attrbits;
8537         u_char c;
8538
8539         *attrflagp = 0;
8540         NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp);
8541         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8542         txdr_hyper(*cookiep, tl); tl += 2;
8543         *tl++ = txdr_unsigned(*lenp);
8544         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8545         NFSGETATTR_ATTRBIT(&attrbits);
8546         nfsrv_putattrbit(nd, &attrbits);
8547         error = nfscl_request(nd, vp, p, cred, NULL);
8548         if (error != 0)
8549                 return (error);
8550         *eofp = true;
8551         *lenp = 0;
8552         if (nd->nd_repstat == 0) {
8553                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8554                 *cookiep = fxdr_hyper(tl); tl += 2;
8555                 cnt = fxdr_unsigned(int, *tl);
8556                 if (cnt < 0) {
8557                         error = EBADRPC;
8558                         goto nfsmout;
8559                 }
8560                 for (i = 0; i < cnt; i++) {
8561                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8562                         len = fxdr_unsigned(int, *tl);
8563                         if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8564                                 error = EBADRPC;
8565                                 goto nfsmout;
8566                         }
8567                         if (uiop == NULL)
8568                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8569                         else if (uiop->uio_resid >= len + 1) {
8570                                 c = len;
8571                                 error = uiomove(&c, sizeof(c), uiop);
8572                                 if (error == 0)
8573                                         error = nfsm_mbufuio(nd, uiop, len);
8574                         } else {
8575                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8576                                 *eofp = false;
8577                         }
8578                         if (error != 0)
8579                                 goto nfsmout;
8580                         *lenp += (len + 1);
8581                 }
8582                 /* Get the eof and skip over the Getattr op status. */
8583                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8584                 /*
8585                  * *eofp is set false above, because it wasn't able to copy
8586                  * all of the reply.
8587                  */
8588                 if (*eofp && *tl == 0)
8589                         *eofp = false;
8590                 error = nfsm_loadattr(nd, nap);
8591                 if (error == 0)
8592                         *attrflagp = 1;
8593         }
8594         if (error == 0)
8595                 error = nd->nd_repstat;
8596 nfsmout:
8597         m_freem(nd->nd_mrep);
8598         return (error);
8599 }
8600
8601 /*
8602  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8603  */
8604 static struct mbuf *
8605 nfsm_split(struct mbuf *mp, uint64_t xfer)
8606 {
8607         struct mbuf *m, *m2;
8608         vm_page_t pg;
8609         int i, j, left, pgno, plen, trim;
8610         char *cp, *cp2;
8611
8612         if ((mp->m_flags & M_EXTPG) == 0) {
8613                 m = m_split(mp, xfer, M_WAITOK);
8614                 return (m);
8615         }
8616
8617         /* Find the correct mbuf to split at. */
8618         for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
8619                 xfer -= m->m_len;
8620         if (m == NULL)
8621                 return (NULL);
8622
8623         /* If xfer == m->m_len, we can just split the mbuf list. */
8624         if (xfer == m->m_len) {
8625                 m2 = m->m_next;
8626                 m->m_next = NULL;
8627                 return (m2);
8628         }
8629
8630         /* Find the page to split at. */
8631         pgno = 0;
8632         left = xfer;
8633         do {
8634                 if (pgno == 0)
8635                         plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
8636                 else
8637                         plen = m_epg_pagelen(m, pgno, 0);
8638                 if (left <= plen)
8639                         break;
8640                 left -= plen;
8641                 pgno++;
8642         } while (pgno < m->m_epg_npgs);
8643         if (pgno == m->m_epg_npgs)
8644                 panic("nfsm_split: eroneous ext_pgs mbuf");
8645
8646         m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
8647         m2->m_epg_flags |= EPG_FLAG_ANON;
8648
8649         /*
8650          * If left < plen, allocate a new page for the new mbuf
8651          * and copy the data after left in the page to this new
8652          * page.
8653          */
8654         if (left < plen) {
8655                 do {
8656                         pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
8657                             VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
8658                             VM_ALLOC_WIRED);
8659                         if (pg == NULL)
8660                                 vm_wait(NULL);
8661                 } while (pg == NULL);
8662                 m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
8663                 m2->m_epg_npgs = 1;
8664
8665                 /* Copy the data after left to the new page. */
8666                 trim = plen - left;
8667                 cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
8668                 if (pgno == 0)
8669                         cp += m->m_epg_1st_off;
8670                 cp += left;
8671                 cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
8672                 if (pgno == m->m_epg_npgs - 1)
8673                         m2->m_epg_last_len = trim;
8674                 else {
8675                         cp2 += PAGE_SIZE - trim;
8676                         m2->m_epg_1st_off = PAGE_SIZE - trim;
8677                         m2->m_epg_last_len = m->m_epg_last_len;
8678                 }
8679                 memcpy(cp2, cp, trim);
8680                 m2->m_len = trim;
8681         } else {
8682                 m2->m_len = 0;
8683                 m2->m_epg_last_len = m->m_epg_last_len;
8684         }
8685
8686         /* Move the pages beyond pgno to the new mbuf. */
8687         for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
8688                 m2->m_epg_pa[j] = m->m_epg_pa[i];
8689                 /* Never moves page 0. */
8690                 m2->m_len += m_epg_pagelen(m, i, 0);
8691         }
8692         m2->m_epg_npgs = j;
8693         m->m_epg_npgs = pgno + 1;
8694         m->m_epg_last_len = left;
8695         m->m_len = xfer;
8696
8697         m2->m_next = m->m_next;
8698         m->m_next = NULL;
8699         return (m2);
8700 }