]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/nfsclient/nfs_clrpcops.c
nfsclient: Propagate copyin() errors from nfsm_uiombuf()
[FreeBSD/FreeBSD.git] / sys / fs / nfsclient / nfs_clrpcops.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35
36 #include <sys/cdefs.h>
37 /*
38  * Rpc op calls, generally called from the vnode op calls or through the
39  * buffer cache, for NFS v2, 3 and 4.
40  * These do not normally make any changes to vnode arguments or use
41  * structures that might change between the VFS variants. The returned
42  * arguments are all at the end, after the NFSPROC_T *p one.
43  */
44
45 #include "opt_inet6.h"
46
47 #include <fs/nfs/nfsport.h>
48 #include <fs/nfsclient/nfs.h>
49 #include <sys/extattr.h>
50 #include <sys/sysctl.h>
51 #include <sys/taskqueue.h>
52
53 SYSCTL_DECL(_vfs_nfs);
54
55 static int      nfsignore_eexist = 0;
56 SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
57     &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
58
59 static int      nfscl_dssameconn = 0;
60 SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
61     &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
62
63 static uint64_t nfs_maxcopyrange = SSIZE_MAX;
64 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
65     &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
66
67 /*
68  * Global variables
69  */
70 extern struct nfsstatsv1 nfsstatsv1;
71 extern int nfs_numnfscbd;
72 extern struct timeval nfsboottime;
73 extern u_int32_t newnfs_false, newnfs_true;
74 extern nfstype nfsv34_type[9];
75 extern int nfsrv_useacl;
76 extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
77 extern int nfscl_debuglevel;
78 extern int nfs_pnfsiothreads;
79 extern u_long sb_max_adj;
80 NFSCLSTATEMUTEX;
81 int nfstest_outofseq = 0;
82 int nfscl_assumeposixlocks = 1;
83 int nfscl_enablecallb = 0;
84 short nfsv4_cbport = NFSV4_CBPORT;
85 int nfstest_openallsetattr = 0;
86
87 #define DIRHDSIZ        offsetof(struct dirent, d_name)
88
89 /*
90  * nfscl_getsameserver() can return one of three values:
91  * NFSDSP_USETHISSESSION - Use this session for the DS.
92  * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
93  *     session.
94  * NFSDSP_NOTFOUND - No matching server was found.
95  */
96 enum nfsclds_state {
97         NFSDSP_USETHISSESSION = 0,
98         NFSDSP_SEQTHISSESSION = 1,
99         NFSDSP_NOTFOUND = 2,
100 };
101
102 /*
103  * Do a write RPC on a DS data file, using this structure for the arguments,
104  * so that this function can be executed by a separate kernel process.
105  */
106 struct nfsclwritedsdorpc {
107         int                     done;
108         int                     inprog;
109         struct task             tsk;
110         struct vnode            *vp;
111         int                     iomode;
112         int                     must_commit;
113         nfsv4stateid_t          *stateidp;
114         struct nfsclds          *dsp;
115         uint64_t                off;
116         int                     len;
117 #ifdef notyet
118         int                     advise;
119 #endif
120         struct nfsfh            *fhp;
121         struct mbuf             *m;
122         int                     vers;
123         int                     minorvers;
124         struct ucred            *cred;
125         NFSPROC_T               *p;
126         int                     err;
127 };
128
129 static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
130     struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
131 static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
132     nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
133 static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
134     struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
135     int);
136 static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
137     nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
138     struct nfsvattr *, struct nfsfh **, int *, int *, void *);
139 static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
140     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
141     NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
142     int *, void *, int *);
143 static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
144     struct nfscllockowner *, u_int64_t, u_int64_t,
145     u_int32_t, struct ucred *, NFSPROC_T *, int);
146 static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
147     struct acl *, nfsv4stateid_t *, void *);
148 static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
149     uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
150     uint32_t, char *);
151 static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
152     uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
153     struct ucred *, NFSPROC_T *);
154 static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
155     struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
156     NFSPROC_T *);
157 static void nfscl_initsessionslots(struct nfsclsession *);
158 static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
159     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
160     struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
161     NFSPROC_T *);
162 static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
163     nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
164     struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
165     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
166 static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
167     struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
168     struct ucred *, NFSPROC_T *);
169 static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
170     nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
171     struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
172 static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
173     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
174     struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
175 static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
176     struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
177     struct ucred *, NFSPROC_T *);
178 static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
179     struct nfsclds *, struct nfsclds **, uint32_t *);
180 static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
181     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
182     NFSPROC_T *);
183 static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
184     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
185 #ifdef notyet
186 static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
187     struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
188     NFSPROC_T *);
189 static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
190     struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
191 #endif
192 static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
193     struct nfsvattr *, int *, struct ucred *, NFSPROC_T *, void *);
194 static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
195     uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
196 static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
197     NFSPROC_T *);
198 static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
199     nfsv4stateid_t *, int *, struct nfsclflayouthead *);
200 static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
201     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
202     struct nfscldeleg **, struct ucred *, NFSPROC_T *);
203 static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
204     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
205     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
206     struct nfsfh **, int *, int *, void *, int *);
207 static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
208     int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
209     struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
210     struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
211 static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
212     nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
213     struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
214     struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
215     int, int, int, int *, struct nfsclflayouthead *, int *);
216 static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
217     uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
218     struct nfsclflayouthead *, struct ucred *, NFSPROC_T *, void *);
219 static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
220     int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
221     struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
222 static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
223     nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
224     struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
225 static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
226     int, struct nfsvattr *, int *, struct ucred *);
227 static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
228 static void nfscl_statfs(struct vnode *, struct ucred *, NFSPROC_T *);
229
230 int nfs_pnfsio(task_fn_t *, void *);
231
232 /*
233  * nfs null call from vfs.
234  */
235 int
236 nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
237 {
238         int error;
239         struct nfsrv_descript nfsd, *nd = &nfsd;
240
241         NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
242         error = nfscl_request(nd, vp, p, cred, NULL);
243         if (nd->nd_repstat && !error)
244                 error = nd->nd_repstat;
245         m_freem(nd->nd_mrep);
246         return (error);
247 }
248
249 /*
250  * nfs access rpc op.
251  * For nfs version 3 and 4, use the access rpc to check accessibility. If file
252  * modes are changed on the server, accesses might still fail later.
253  */
254 int
255 nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
256     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
257 {
258         int error;
259         u_int32_t mode, rmode;
260
261         if (acmode & VREAD)
262                 mode = NFSACCESS_READ;
263         else
264                 mode = 0;
265         if (vnode_vtype(vp) == VDIR) {
266                 if (acmode & VWRITE)
267                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
268                                  NFSACCESS_DELETE);
269                 if (acmode & VEXEC)
270                         mode |= NFSACCESS_LOOKUP;
271         } else {
272                 if (acmode & VWRITE)
273                         mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
274                 if (acmode & VEXEC)
275                         mode |= NFSACCESS_EXECUTE;
276         }
277
278         /*
279          * Now, just call nfsrpc_accessrpc() to do the actual RPC.
280          */
281         error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
282             NULL);
283
284         /*
285          * The NFS V3 spec does not clarify whether or not
286          * the returned access bits can be a superset of
287          * the ones requested, so...
288          */
289         if (!error && (rmode & mode) != mode)
290                 error = EACCES;
291         return (error);
292 }
293
294 /*
295  * The actual rpc, separated out for Darwin.
296  */
297 int
298 nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
299     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
300     void *stuff)
301 {
302         u_int32_t *tl;
303         u_int32_t supported, rmode;
304         int error;
305         struct nfsrv_descript nfsd, *nd = &nfsd;
306         nfsattrbit_t attrbits;
307         struct nfsmount *nmp;
308         struct nfsnode *np;
309
310         *attrflagp = 0;
311         supported = mode;
312         nmp = VFSTONFS(vp->v_mount);
313         np = VTONFS(vp);
314         if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
315             nmp->nm_fhsize == 0) {
316                 /* Attempt to get the actual root file handle. */
317                 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
318                 if (error != 0)
319                         return (EACCES);
320                 if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
321                         nfscl_statfs(vp, cred, p);
322         }
323         NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
324         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
325         *tl = txdr_unsigned(mode);
326         if (nd->nd_flag & ND_NFSV4) {
327                 /*
328                  * And do a Getattr op.
329                  */
330                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
331                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
332                 NFSGETATTR_ATTRBIT(&attrbits);
333                 (void) nfsrv_putattrbit(nd, &attrbits);
334         }
335         error = nfscl_request(nd, vp, p, cred, stuff);
336         if (error)
337                 return (error);
338         if (nd->nd_flag & ND_NFSV3) {
339                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
340                 if (error)
341                         goto nfsmout;
342         }
343         if (!nd->nd_repstat) {
344                 if (nd->nd_flag & ND_NFSV4) {
345                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
346                         supported = fxdr_unsigned(u_int32_t, *tl++);
347                 } else {
348                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
349                 }
350                 rmode = fxdr_unsigned(u_int32_t, *tl);
351                 if (nd->nd_flag & ND_NFSV4)
352                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
353
354                 /*
355                  * It's not obvious what should be done about
356                  * unsupported access modes. For now, be paranoid
357                  * and clear the unsupported ones.
358                  */
359                 rmode &= supported;
360                 *rmodep = rmode;
361         } else
362                 error = nd->nd_repstat;
363 nfsmout:
364         m_freem(nd->nd_mrep);
365         return (error);
366 }
367
368 /*
369  * nfs open rpc
370  */
371 int
372 nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
373 {
374         struct nfsclopen *op;
375         struct nfscldeleg *dp;
376         struct nfsfh *nfhp;
377         struct nfsnode *np = VTONFS(vp);
378         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
379         u_int32_t mode, clidrev;
380         int ret, newone, error, expireret = 0, retrycnt;
381
382         /*
383          * For NFSv4, Open Ops are only done on Regular Files.
384          */
385         if (vnode_vtype(vp) != VREG)
386                 return (0);
387         mode = 0;
388         if (amode & FREAD)
389                 mode |= NFSV4OPEN_ACCESSREAD;
390         if (amode & FWRITE)
391                 mode |= NFSV4OPEN_ACCESSWRITE;
392         nfhp = np->n_fhp;
393
394         retrycnt = 0;
395 #ifdef notdef
396 { char name[100]; int namel;
397 namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
398 bcopy(NFS4NODENAME(np->n_v4), name, namel);
399 name[namel] = '\0';
400 printf("rpcopen p=0x%x name=%s",p->p_pid,name);
401 if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
402 else printf(" fhl=0\n");
403 }
404 #endif
405         do {
406             dp = NULL;
407             error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
408                 cred, p, NULL, &op, &newone, &ret, 1, true);
409             if (error) {
410                 return (error);
411             }
412             if (nmp->nm_clp != NULL)
413                 clidrev = nmp->nm_clp->nfsc_clientidrev;
414             else
415                 clidrev = 0;
416             if (ret == NFSCLOPEN_DOOPEN) {
417                 if (np->n_v4 != NULL) {
418                         /*
419                          * For the first attempt, try and get a layout, if
420                          * pNFS is enabled for the mount.
421                          */
422                         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
423                             nfs_numnfscbd == 0 ||
424                             (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
425                                 error = nfsrpc_openrpc(nmp, vp,
426                                     np->n_v4->n4_data,
427                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
428                                     np->n_fhp->nfh_len, mode, op,
429                                     NFS4NODENAME(np->n_v4),
430                                     np->n_v4->n4_namelen,
431                                     &dp, 0, 0x0, cred, p, 0, 0);
432                         else
433                                 error = nfsrpc_getopenlayout(nmp, vp,
434                                     np->n_v4->n4_data,
435                                     np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
436                                     np->n_fhp->nfh_len, mode, op,
437                                     NFS4NODENAME(np->n_v4),
438                                     np->n_v4->n4_namelen, &dp, cred, p);
439                         if (dp != NULL) {
440 #ifdef APPLE
441                                 OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
442 #else
443                                 NFSLOCKNODE(np);
444                                 np->n_flag &= ~NDELEGMOD;
445                                 /*
446                                  * Invalidate the attribute cache, so that
447                                  * attributes that pre-date the issue of a
448                                  * delegation are not cached, since the
449                                  * cached attributes will remain valid while
450                                  * the delegation is held.
451                                  */
452                                 NFSINVALATTRCACHE(np);
453                                 NFSUNLOCKNODE(np);
454 #endif
455                                 (void) nfscl_deleg(nmp->nm_mountp,
456                                     op->nfso_own->nfsow_clp,
457                                     nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
458                         }
459                 } else {
460                         error = EIO;
461                 }
462                 newnfs_copyincred(cred, &op->nfso_cred);
463             } else if (ret == NFSCLOPEN_SETCRED)
464                 /*
465                  * This is a new local open on a delegation. It needs
466                  * to have credentials so that an open can be done
467                  * against the server during recovery.
468                  */
469                 newnfs_copyincred(cred, &op->nfso_cred);
470
471             /*
472              * nfso_opencnt is the count of how many VOP_OPEN()s have
473              * been done on this Open successfully and a VOP_CLOSE()
474              * is expected for each of these.
475              * If error is non-zero, don't increment it, since the Open
476              * hasn't succeeded yet.
477              */
478             if (!error) {
479                 op->nfso_opencnt++;
480                 if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
481                     NFSLOCKNODE(np);
482                     np->n_openstateid = op;
483                     NFSUNLOCKNODE(np);
484                 }
485             }
486             nfscl_openrelease(nmp, op, error, newone);
487             if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
488                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
489                 error == NFSERR_BADSESSION) {
490                 (void) nfs_catnap(PZERO, error, "nfs_open");
491             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
492                 && clidrev != 0) {
493                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
494                 retrycnt++;
495             }
496         } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
497             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
498             error == NFSERR_BADSESSION ||
499             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
500              expireret == 0 && clidrev != 0 && retrycnt < 4));
501         if (error && retrycnt >= 4)
502                 error = EIO;
503         return (error);
504 }
505
506 /*
507  * the actual open rpc
508  */
509 int
510 nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
511     u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
512     u_int8_t *name, int namelen, struct nfscldeleg **dpp,
513     int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
514     int syscred, int recursed)
515 {
516         u_int32_t *tl;
517         struct nfsrv_descript nfsd, *nd = &nfsd;
518         struct nfscldeleg *dp, *ndp = NULL;
519         struct nfsvattr nfsva;
520         u_int32_t rflags, deleg;
521         nfsattrbit_t attrbits;
522         int error, ret, acesize, limitby;
523         struct nfsclsession *tsep;
524
525         dp = *dpp;
526         *dpp = NULL;
527         nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
528             cred);
529         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
530         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
531         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
532         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
533         tsep = nfsmnt_mdssession(nmp);
534         *tl++ = tsep->nfsess_clientid.lval[0];
535         *tl = tsep->nfsess_clientid.lval[1];
536         (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
537         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
538         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
539         if (reclaim) {
540                 *tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
541                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
542                 *tl = txdr_unsigned(delegtype);
543         } else {
544                 if (dp != NULL) {
545                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
546                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
547                         if (NFSHASNFSV4N(nmp))
548                                 *tl++ = 0;
549                         else
550                                 *tl++ = dp->nfsdl_stateid.seqid;
551                         *tl++ = dp->nfsdl_stateid.other[0];
552                         *tl++ = dp->nfsdl_stateid.other[1];
553                         *tl = dp->nfsdl_stateid.other[2];
554                 } else {
555                         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
556                 }
557                 (void) nfsm_strtom(nd, name, namelen);
558         }
559         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
560         *tl = txdr_unsigned(NFSV4OP_GETATTR);
561         NFSZERO_ATTRBIT(&attrbits);
562         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
563         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
564         (void) nfsrv_putattrbit(nd, &attrbits);
565         if (syscred)
566                 nd->nd_flag |= ND_USEGSSNAME;
567         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
568             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
569         if (error)
570                 return (error);
571         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
572         if (nd->nd_repstat == 0 || (nd->nd_repstat == NFSERR_DELAY &&
573             reclaim != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)) {
574                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
575                     6 * NFSX_UNSIGNED);
576                 op->nfso_stateid.seqid = *tl++;
577                 op->nfso_stateid.other[0] = *tl++;
578                 op->nfso_stateid.other[1] = *tl++;
579                 op->nfso_stateid.other[2] = *tl;
580                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
581                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
582                 if (error)
583                         goto nfsmout;
584                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
585                 deleg = fxdr_unsigned(u_int32_t, *tl);
586                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
587                     deleg == NFSV4OPEN_DELEGATEWRITE) {
588                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
589                               NFSCLFLAGS_FIRSTDELEG))
590                                 op->nfso_own->nfsow_clp->nfsc_flags |=
591                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
592                         ndp = malloc(
593                             sizeof (struct nfscldeleg) + newfhlen,
594                             M_NFSCLDELEG, M_WAITOK);
595                         LIST_INIT(&ndp->nfsdl_owner);
596                         LIST_INIT(&ndp->nfsdl_lock);
597                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
598                         ndp->nfsdl_fhlen = newfhlen;
599                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
600                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
601                         nfscl_lockinit(&ndp->nfsdl_rwlock);
602                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
603                             NFSX_UNSIGNED);
604                         ndp->nfsdl_stateid.seqid = *tl++;
605                         ndp->nfsdl_stateid.other[0] = *tl++;
606                         ndp->nfsdl_stateid.other[1] = *tl++;
607                         ndp->nfsdl_stateid.other[2] = *tl++;
608                         ret = fxdr_unsigned(int, *tl);
609                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
610                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
611                                 /*
612                                  * Indicates how much the file can grow.
613                                  */
614                                 NFSM_DISSECT(tl, u_int32_t *,
615                                     3 * NFSX_UNSIGNED);
616                                 limitby = fxdr_unsigned(int, *tl++);
617                                 switch (limitby) {
618                                 case NFSV4OPEN_LIMITSIZE:
619                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
620                                         break;
621                                 case NFSV4OPEN_LIMITBLOCKS:
622                                         ndp->nfsdl_sizelimit =
623                                             fxdr_unsigned(u_int64_t, *tl++);
624                                         ndp->nfsdl_sizelimit *=
625                                             fxdr_unsigned(u_int64_t, *tl);
626                                         break;
627                                 default:
628                                         error = NFSERR_BADXDR;
629                                         goto nfsmout;
630                                 }
631                         } else {
632                                 ndp->nfsdl_flags = NFSCLDL_READ;
633                         }
634                         if (ret)
635                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
636                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
637                             &ret, &acesize, p);
638                         if (error)
639                                 goto nfsmout;
640                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
641                         error = NFSERR_BADXDR;
642                         goto nfsmout;
643                 }
644                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
645                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
646                 if (*++tl == 0) {
647                         KASSERT(nd->nd_repstat == 0,
648                             ("nfsrpc_openrpc: Getattr repstat"));
649                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
650                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
651                             NULL, NULL, NULL, p, cred);
652                         if (error)
653                                 goto nfsmout;
654                 }
655                 if (ndp != NULL) {
656                         if (reclaim != 0 && dp != NULL) {
657                                 ndp->nfsdl_change = dp->nfsdl_change;
658                                 ndp->nfsdl_modtime = dp->nfsdl_modtime;
659                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
660                         } else if (nd->nd_repstat == 0) {
661                                 ndp->nfsdl_change = nfsva.na_filerev;
662                                 ndp->nfsdl_modtime = nfsva.na_mtime;
663                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
664                         } else
665                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
666                 }
667                 nd->nd_repstat = 0;
668                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
669                     do {
670                         ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
671                             cred, p);
672                         if (ret == NFSERR_DELAY)
673                             (void) nfs_catnap(PZERO, ret, "nfs_open");
674                     } while (ret == NFSERR_DELAY);
675                     error = ret;
676                 }
677                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
678                     nfscl_assumeposixlocks)
679                     op->nfso_posixlock = 1;
680                 else
681                     op->nfso_posixlock = 0;
682
683                 /*
684                  * If the server is handing out delegations, but we didn't
685                  * get one because an OpenConfirm was required, try the
686                  * Open again, to get a delegation. This is a harmless no-op,
687                  * from a server's point of view.
688                  */
689                 if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
690                     (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
691                     && !error && dp == NULL && ndp == NULL && !recursed) {
692                     do {
693                         ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
694                             newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
695                             cred, p, syscred, 1);
696                         if (ret == NFSERR_DELAY)
697                             (void) nfs_catnap(PZERO, ret, "nfs_open2");
698                     } while (ret == NFSERR_DELAY);
699                     if (ret) {
700                         if (ndp != NULL) {
701                                 free(ndp, M_NFSCLDELEG);
702                                 ndp = NULL;
703                         }
704                         if (ret == NFSERR_STALECLIENTID ||
705                             ret == NFSERR_STALEDONTRECOVER ||
706                             ret == NFSERR_BADSESSION)
707                                 error = ret;
708                     }
709                 }
710         }
711         if (nd->nd_repstat != 0 && error == 0)
712                 error = nd->nd_repstat;
713         if (error == NFSERR_STALECLIENTID)
714                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
715 nfsmout:
716         if (!error)
717                 *dpp = ndp;
718         else if (ndp != NULL)
719                 free(ndp, M_NFSCLDELEG);
720         m_freem(nd->nd_mrep);
721         return (error);
722 }
723
724 /*
725  * open downgrade rpc
726  */
727 int
728 nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
729     struct ucred *cred, NFSPROC_T *p)
730 {
731         u_int32_t *tl;
732         struct nfsrv_descript nfsd, *nd = &nfsd;
733         int error;
734
735         NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
736         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
737         if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
738                 *tl++ = 0;
739         else
740                 *tl++ = op->nfso_stateid.seqid;
741         *tl++ = op->nfso_stateid.other[0];
742         *tl++ = op->nfso_stateid.other[1];
743         *tl++ = op->nfso_stateid.other[2];
744         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
745         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
746         *tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
747         error = nfscl_request(nd, vp, p, cred, NULL);
748         if (error)
749                 return (error);
750         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
751         if (!nd->nd_repstat) {
752                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
753                 op->nfso_stateid.seqid = *tl++;
754                 op->nfso_stateid.other[0] = *tl++;
755                 op->nfso_stateid.other[1] = *tl++;
756                 op->nfso_stateid.other[2] = *tl;
757         }
758         if (nd->nd_repstat && error == 0)
759                 error = nd->nd_repstat;
760         if (error == NFSERR_STALESTATEID)
761                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
762 nfsmout:
763         m_freem(nd->nd_mrep);
764         return (error);
765 }
766
767 /*
768  * V4 Close operation.
769  */
770 int
771 nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
772 {
773         struct nfsclclient *clp;
774         int error;
775
776         if (vnode_vtype(vp) != VREG)
777                 return (0);
778         if (doclose)
779                 error = nfscl_doclose(vp, &clp, p);
780         else {
781                 error = nfscl_getclose(vp, &clp);
782                 if (error == 0)
783                         nfscl_clientrelease(clp);
784         }
785         return (error);
786 }
787
788 /*
789  * Close the open.
790  */
791 int
792 nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
793     bool loop_on_delayed, bool freeop)
794 {
795         struct nfsrv_descript nfsd, *nd = &nfsd;
796         struct nfscllockowner *lp, *nlp;
797         struct nfscllock *lop, *nlop;
798         struct ucred *tcred;
799         u_int64_t off = 0, len = 0;
800         u_int32_t type = NFSV4LOCKT_READ;
801         int error, do_unlock, trycnt;
802
803         tcred = newnfs_getcred();
804         newnfs_copycred(&op->nfso_cred, tcred);
805         /*
806          * (Theoretically this could be done in the same
807          *  compound as the close, but having multiple
808          *  sequenced Ops in the same compound might be
809          *  too scary for some servers.)
810          */
811         if (op->nfso_posixlock) {
812                 off = 0;
813                 len = NFS64BITSSET;
814                 type = NFSV4LOCKT_READ;
815         }
816
817         /*
818          * Since this function is only called from VOP_INACTIVE(), no
819          * other thread will be manipulating this Open. As such, the
820          * lock lists are not being changed by other threads, so it should
821          * be safe to do this without locking.
822          */
823         LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
824                 do_unlock = 1;
825                 LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
826                         if (op->nfso_posixlock == 0) {
827                                 off = lop->nfslo_first;
828                                 len = lop->nfslo_end - lop->nfslo_first;
829                                 if (lop->nfslo_type == F_WRLCK)
830                                         type = NFSV4LOCKT_WRITE;
831                                 else
832                                         type = NFSV4LOCKT_READ;
833                         }
834                         if (do_unlock) {
835                                 trycnt = 0;
836                                 do {
837                                         error = nfsrpc_locku(nd, nmp, lp, off,
838                                             len, type, tcred, p, 0);
839                                         if ((nd->nd_repstat == NFSERR_GRACE ||
840                                             nd->nd_repstat == NFSERR_DELAY) &&
841                                             error == 0)
842                                                 (void) nfs_catnap(PZERO,
843                                                     (int)nd->nd_repstat,
844                                                     "nfs_close");
845                                 } while ((nd->nd_repstat == NFSERR_GRACE ||
846                                     nd->nd_repstat == NFSERR_DELAY) &&
847                                     error == 0 && trycnt++ < 5);
848                                 if (op->nfso_posixlock)
849                                         do_unlock = 0;
850                         }
851                         nfscl_freelock(lop, 0);
852                 }
853                 /*
854                  * Do a ReleaseLockOwner.
855                  * The lock owner name nfsl_owner may be used by other opens for
856                  * other files but the lock_owner4 name that nfsrpc_rellockown()
857                  * puts on the wire has the file handle for this file appended
858                  * to it, so it can be done now.
859                  */
860                 (void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
861                     lp->nfsl_open->nfso_fhlen, tcred, p);
862         }
863
864         /*
865          * There could be other Opens for different files on the same
866          * OpenOwner, so locking is required.
867          */
868         NFSLOCKCLSTATE();
869         nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
870         NFSUNLOCKCLSTATE();
871         do {
872                 error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
873                 if (error == NFSERR_GRACE)
874                         (void) nfs_catnap(PZERO, error, "nfs_close");
875         } while (error == NFSERR_GRACE);
876         NFSLOCKCLSTATE();
877         nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
878
879         LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
880                 nfscl_freelockowner(lp, 0);
881         if (freeop && error != NFSERR_DELAY)
882                 nfscl_freeopen(op, 0, true);
883         NFSUNLOCKCLSTATE();
884         NFSFREECRED(tcred);
885         return (error);
886 }
887
888 /*
889  * The actual Close RPC.
890  */
891 int
892 nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
893     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
894     int syscred)
895 {
896         u_int32_t *tl;
897         int error;
898
899         nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
900             op->nfso_fhlen, NULL, NULL, 0, 0, cred);
901         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
902         if (NFSHASNFSV4N(nmp)) {
903                 *tl++ = 0;
904                 *tl++ = 0;
905         } else {
906                 *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
907                 *tl++ = op->nfso_stateid.seqid;
908         }
909         *tl++ = op->nfso_stateid.other[0];
910         *tl++ = op->nfso_stateid.other[1];
911         *tl = op->nfso_stateid.other[2];
912         if (syscred)
913                 nd->nd_flag |= ND_USEGSSNAME;
914         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
915             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
916         if (error)
917                 return (error);
918         if (!NFSHASNFSV4N(nmp))
919                 NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
920         if (nd->nd_repstat == 0)
921                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
922         error = nd->nd_repstat;
923         if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
924                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
925 nfsmout:
926         m_freem(nd->nd_mrep);
927         return (error);
928 }
929
930 /*
931  * V4 Open Confirm RPC.
932  */
933 int
934 nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
935     struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
936 {
937         u_int32_t *tl;
938         struct nfsrv_descript nfsd, *nd = &nfsd;
939         struct nfsmount *nmp;
940         int error;
941
942         nmp = VFSTONFS(vp->v_mount);
943         if (NFSHASNFSV4N(nmp))
944                 return (0);             /* No confirmation for NFSv4.1. */
945         nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
946             0, 0, NULL);
947         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
948         *tl++ = op->nfso_stateid.seqid;
949         *tl++ = op->nfso_stateid.other[0];
950         *tl++ = op->nfso_stateid.other[1];
951         *tl++ = op->nfso_stateid.other[2];
952         *tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
953         error = nfscl_request(nd, vp, p, cred, NULL);
954         if (error)
955                 return (error);
956         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
957         if (!nd->nd_repstat) {
958                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
959                 op->nfso_stateid.seqid = *tl++;
960                 op->nfso_stateid.other[0] = *tl++;
961                 op->nfso_stateid.other[1] = *tl++;
962                 op->nfso_stateid.other[2] = *tl;
963         }
964         error = nd->nd_repstat;
965         if (error == NFSERR_STALESTATEID)
966                 nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
967 nfsmout:
968         m_freem(nd->nd_mrep);
969         return (error);
970 }
971
972 /*
973  * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
974  * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
975  */
976 int
977 nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
978     bool *retokp, struct ucred *cred, NFSPROC_T *p)
979 {
980         u_int32_t *tl;
981         struct nfsrv_descript nfsd;
982         struct nfsrv_descript *nd = &nfsd;
983         u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
984         u_short port;
985         int error, isinet6 = 0, callblen;
986         nfsquad_t confirm;
987         static u_int32_t rev = 0;
988         struct nfsclds *dsp, *odsp;
989         struct in6_addr a6;
990         struct nfsclsession *tsep;
991         struct rpc_reconupcall recon;
992         struct nfscl_reconarg *rcp;
993
994         if (nfsboottime.tv_sec == 0)
995                 NFSSETBOOTTIME(nfsboottime);
996         if (NFSHASNFSV4N(nmp)) {
997                 error = NFSERR_BADSESSION;
998                 odsp = dsp = NULL;
999                 if (retokp != NULL) {
1000                         NFSLOCKMNT(nmp);
1001                         odsp = TAILQ_FIRST(&nmp->nm_sess);
1002                         NFSUNLOCKMNT(nmp);
1003                 }
1004                 if (odsp != NULL) {
1005                         /*
1006                          * When a session already exists, first try a
1007                          * CreateSession with the extant ClientID.
1008                          */
1009                         dsp = malloc(sizeof(struct nfsclds) +
1010                             odsp->nfsclds_servownlen + 1, M_NFSCLDS,
1011                             M_WAITOK | M_ZERO);
1012                         dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
1013                         dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
1014                         dsp->nfsclds_sess.nfsess_clientid =
1015                             odsp->nfsclds_sess.nfsess_clientid;
1016                         dsp->nfsclds_sess.nfsess_sequenceid =
1017                             odsp->nfsclds_sess.nfsess_sequenceid + 1;
1018                         dsp->nfsclds_flags = odsp->nfsclds_flags;
1019                         if (dsp->nfsclds_servownlen > 0)
1020                                 memcpy(dsp->nfsclds_serverown,
1021                                     odsp->nfsclds_serverown,
1022                                     dsp->nfsclds_servownlen + 1);
1023                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1024                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
1025                             NULL, MTX_DEF);
1026                         nfscl_initsessionslots(&dsp->nfsclds_sess);
1027                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
1028                             &nmp->nm_sockreq, NULL,
1029                             dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1030                         NFSCL_DEBUG(1, "create session for extant "
1031                             "ClientID=%d\n", error);
1032                         if (error != 0) {
1033                                 nfscl_freenfsclds(dsp);
1034                                 dsp = NULL;
1035                                 /*
1036                                  * If *retokp is true, return any error other
1037                                  * than NFSERR_STALECLIENTID,
1038                                  * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1039                                  * so that nfscl_recover() will not loop.
1040                                  */
1041                                 if (*retokp)
1042                                         return (NFSERR_IO);
1043                         } else
1044                                 *retokp = true;
1045                 } else if (retokp != NULL && *retokp)
1046                         return (NFSERR_IO);
1047                 if (error != 0) {
1048                         /*
1049                          * Either there was no previous session or the
1050                          * CreateSession attempt failed, so...
1051                          * do an ExchangeID followed by the CreateSession.
1052                          */
1053                         clp->nfsc_rev = rev++;
1054                         error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1055                             NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1056                             cred, p);
1057                         NFSCL_DEBUG(1, "aft exch=%d\n", error);
1058                         if (error == 0)
1059                                 error = nfsrpc_createsession(nmp,
1060                                     &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1061                                     dsp->nfsclds_sess.nfsess_sequenceid, 1,
1062                                     cred, p);
1063                         NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1064                 }
1065                 if (error == 0) {
1066                         /*
1067                          * If the session supports a backchannel, set up
1068                          * the BindConnectionToSession call in the krpc
1069                          * so that it is done on a reconnection.
1070                          */
1071                         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1072                                 rcp = mem_alloc(sizeof(*rcp));
1073                                 rcp->minorvers = nmp->nm_minorvers;
1074                                 memcpy(rcp->sessionid,
1075                                     dsp->nfsclds_sess.nfsess_sessionid,
1076                                     NFSX_V4SESSIONID);
1077                                 recon.call = nfsrpc_bindconnsess;
1078                                 recon.arg = rcp;
1079                                 CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1080                                     &recon);
1081                         }
1082
1083                         NFSLOCKMNT(nmp);
1084                         /*
1085                          * The old sessions cannot be safely free'd
1086                          * here, since they may still be used by
1087                          * in-progress RPCs.
1088                          */
1089                         tsep = NULL;
1090                         if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1091                                 /*
1092                                  * Mark the old session defunct.  Needed
1093                                  * when called from nfscl_hasexpired().
1094                                  */
1095                                 tsep = NFSMNT_MDSSESSION(nmp);
1096                                 tsep->nfsess_defunct = 1;
1097                         }
1098                         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1099                             nfsclds_list);
1100                         /*
1101                          * Wake up RPCs waiting for a slot on the
1102                          * old session. These will then fail with
1103                          * NFSERR_BADSESSION and be retried with the
1104                          * new session by nfsv4_setsequence().
1105                          * Also wakeup() processes waiting for the
1106                          * new session.
1107                          */
1108                         if (tsep != NULL)
1109                                 wakeup(&tsep->nfsess_slots);
1110                         wakeup(&nmp->nm_sess);
1111                         NFSUNLOCKMNT(nmp);
1112                 } else if (dsp != NULL)
1113                         nfscl_freenfsclds(dsp);
1114                 if (error == 0 && reclaim == 0) {
1115                         error = nfsrpc_reclaimcomplete(nmp, cred, p);
1116                         NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1117                         if (error == NFSERR_COMPLETEALREADY ||
1118                             error == NFSERR_NOTSUPP)
1119                                 /* Ignore this error. */
1120                                 error = 0;
1121                 }
1122                 return (error);
1123         } else if (retokp != NULL && *retokp)
1124                 return (NFSERR_IO);
1125         clp->nfsc_rev = rev++;
1126
1127         /*
1128          * Allocate a single session structure for NFSv4.0, because some of
1129          * the fields are used by NFSv4.0 although it doesn't do a session.
1130          */
1131         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1132         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1133         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1134         NFSLOCKMNT(nmp);
1135         TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1136         tsep = NFSMNT_MDSSESSION(nmp);
1137         NFSUNLOCKMNT(nmp);
1138
1139         nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1140             NULL);
1141         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1142         *tl++ = txdr_unsigned(nfsboottime.tv_sec);
1143         *tl = txdr_unsigned(clp->nfsc_rev);
1144         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1145
1146         /*
1147          * set up the callback address
1148          */
1149         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1150         *tl = txdr_unsigned(NFS_CALLBCKPROG);
1151         callblen = strlen(nfsv4_callbackaddr);
1152         if (callblen == 0)
1153                 cp = nfscl_getmyip(nmp, &a6, &isinet6);
1154         if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1155             (callblen > 0 || cp != NULL)) {
1156                 port = htons(nfsv4_cbport);
1157                 cp2 = (u_int8_t *)&port;
1158 #ifdef INET6
1159                 if ((callblen > 0 &&
1160                      strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1161                         char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1162
1163                         (void) nfsm_strtom(nd, "tcp6", 4);
1164                         if (callblen == 0) {
1165                                 ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1166                                 ip6add = ip6buf;
1167                         } else {
1168                                 ip6add = nfsv4_callbackaddr;
1169                         }
1170                         snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1171                             ip6add, cp2[0], cp2[1]);
1172                 } else
1173 #endif
1174                 {
1175                         (void) nfsm_strtom(nd, "tcp", 3);
1176                         if (callblen == 0)
1177                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1178                                     "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1179                                     cp[2], cp[3], cp2[0], cp2[1]);
1180                         else
1181                                 snprintf(addr, INET6_ADDRSTRLEN + 9,
1182                                     "%s.%d.%d", nfsv4_callbackaddr,
1183                                     cp2[0], cp2[1]);
1184                 }
1185                 (void) nfsm_strtom(nd, addr, strlen(addr));
1186         } else {
1187                 (void) nfsm_strtom(nd, "tcp", 3);
1188                 (void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1189         }
1190         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1191         *tl = txdr_unsigned(clp->nfsc_cbident);
1192         nd->nd_flag |= ND_USEGSSNAME;
1193         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1194                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1195         if (error)
1196                 return (error);
1197         if (nd->nd_repstat == 0) {
1198             NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1199             tsep->nfsess_clientid.lval[0] = *tl++;
1200             tsep->nfsess_clientid.lval[1] = *tl++;
1201             confirm.lval[0] = *tl++;
1202             confirm.lval[1] = *tl;
1203             m_freem(nd->nd_mrep);
1204             nd->nd_mrep = NULL;
1205
1206             /*
1207              * and confirm it.
1208              */
1209             nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1210                 NULL, 0, 0, NULL);
1211             NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1212             *tl++ = tsep->nfsess_clientid.lval[0];
1213             *tl++ = tsep->nfsess_clientid.lval[1];
1214             *tl++ = confirm.lval[0];
1215             *tl = confirm.lval[1];
1216             nd->nd_flag |= ND_USEGSSNAME;
1217             error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1218                 cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1219             if (error)
1220                 return (error);
1221             m_freem(nd->nd_mrep);
1222             nd->nd_mrep = NULL;
1223         }
1224         error = nd->nd_repstat;
1225 nfsmout:
1226         m_freem(nd->nd_mrep);
1227         return (error);
1228 }
1229
1230 /*
1231  * nfs getattr call.
1232  */
1233 int
1234 nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1235     struct nfsvattr *nap, void *stuff)
1236 {
1237         struct nfsrv_descript nfsd, *nd = &nfsd;
1238         int error;
1239         nfsattrbit_t attrbits;
1240         struct nfsnode *np;
1241         struct nfsmount *nmp;
1242
1243         nmp = VFSTONFS(vp->v_mount);
1244         np = VTONFS(vp);
1245         if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
1246             nmp->nm_fhsize == 0) {
1247                 /* Attempt to get the actual root file handle. */
1248                 error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
1249                 if (error != 0)
1250                         return (EACCES);
1251                 if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
1252                         nfscl_statfs(vp, cred, p);
1253         }
1254         NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1255         if (nd->nd_flag & ND_NFSV4) {
1256                 NFSGETATTR_ATTRBIT(&attrbits);
1257                 (void) nfsrv_putattrbit(nd, &attrbits);
1258         }
1259         error = nfscl_request(nd, vp, p, cred, stuff);
1260         if (error)
1261                 return (error);
1262         if (!nd->nd_repstat)
1263                 error = nfsm_loadattr(nd, nap);
1264         else
1265                 error = nd->nd_repstat;
1266         m_freem(nd->nd_mrep);
1267         return (error);
1268 }
1269
1270 /*
1271  * nfs getattr call with non-vnode arguments.
1272  */
1273 int
1274 nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1275     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1276     uint32_t *leasep)
1277 {
1278         struct nfsrv_descript nfsd, *nd = &nfsd;
1279         int error, vers = NFS_VER2;
1280         nfsattrbit_t attrbits;
1281
1282         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1283             cred);
1284         if (nd->nd_flag & ND_NFSV4) {
1285                 vers = NFS_VER4;
1286                 NFSGETATTR_ATTRBIT(&attrbits);
1287                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1288                 (void) nfsrv_putattrbit(nd, &attrbits);
1289         } else if (nd->nd_flag & ND_NFSV3) {
1290                 vers = NFS_VER3;
1291         }
1292         if (syscred)
1293                 nd->nd_flag |= ND_USEGSSNAME;
1294         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1295             NFS_PROG, vers, NULL, 1, xidp, NULL);
1296         if (error)
1297                 return (error);
1298         if (nd->nd_repstat == 0) {
1299                 if ((nd->nd_flag & ND_NFSV4) != 0)
1300                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1301                             NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1302                             NULL, NULL);
1303                 else
1304                         error = nfsm_loadattr(nd, nap);
1305         } else
1306                 error = nd->nd_repstat;
1307         m_freem(nd->nd_mrep);
1308         return (error);
1309 }
1310
1311 /*
1312  * Do an nfs setattr operation.
1313  */
1314 int
1315 nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1316     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1317     void *stuff)
1318 {
1319         int error, expireret = 0, openerr, retrycnt;
1320         u_int32_t clidrev = 0, mode;
1321         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1322         struct nfsfh *nfhp;
1323         nfsv4stateid_t stateid;
1324         void *lckp;
1325
1326         if (nmp->nm_clp != NULL)
1327                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1328         if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1329                 mode = NFSV4OPEN_ACCESSWRITE;
1330         else
1331                 mode = NFSV4OPEN_ACCESSREAD;
1332         retrycnt = 0;
1333         do {
1334                 lckp = NULL;
1335                 openerr = 1;
1336                 if (NFSHASNFSV4(nmp)) {
1337                         nfhp = VTONFS(vp)->n_fhp;
1338                         error = nfscl_getstateid(vp, nfhp->nfh_fh,
1339                             nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1340                         if (error && vnode_vtype(vp) == VREG &&
1341                             (mode == NFSV4OPEN_ACCESSWRITE ||
1342                              nfstest_openallsetattr)) {
1343                                 /*
1344                                  * No Open stateid, so try and open the file
1345                                  * now.
1346                                  */
1347                                 if (mode == NFSV4OPEN_ACCESSWRITE)
1348                                         openerr = nfsrpc_open(vp, FWRITE, cred,
1349                                             p);
1350                                 else
1351                                         openerr = nfsrpc_open(vp, FREAD, cred,
1352                                             p);
1353                                 if (!openerr)
1354                                         (void) nfscl_getstateid(vp,
1355                                             nfhp->nfh_fh, nfhp->nfh_len,
1356                                             mode, 0, cred, p, &stateid, &lckp);
1357                         }
1358                 }
1359                 if (vap != NULL)
1360                         error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1361                             rnap, attrflagp, stuff);
1362                 else
1363                         error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1364                             stuff);
1365                 if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1366                         NFSLOCKMNT(nmp);
1367                         nmp->nm_state |= NFSSTA_OPENMODE;
1368                         NFSUNLOCKMNT(nmp);
1369                 }
1370                 if (error == NFSERR_STALESTATEID)
1371                         nfscl_initiate_recovery(nmp->nm_clp);
1372                 if (lckp != NULL)
1373                         nfscl_lockderef(lckp);
1374                 if (!openerr)
1375                         (void) nfsrpc_close(vp, 0, p);
1376                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1377                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1378                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1379                         (void) nfs_catnap(PZERO, error, "nfs_setattr");
1380                 } else if ((error == NFSERR_EXPIRED ||
1381                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1382                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1383                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1384                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1385                     NFSHASNFSV4N(nmp)) {
1386                         error = EIO;
1387                 }
1388                 retrycnt++;
1389         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1390             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1391             error == NFSERR_BADSESSION ||
1392             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1393             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1394              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1395             (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1396              retrycnt < 4));
1397         if (error && retrycnt >= 4)
1398                 error = EIO;
1399         return (error);
1400 }
1401
1402 static int
1403 nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1404     nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1405     struct nfsvattr *rnap, int *attrflagp, void *stuff)
1406 {
1407         u_int32_t *tl;
1408         struct nfsrv_descript nfsd, *nd = &nfsd;
1409         int error;
1410         nfsattrbit_t attrbits;
1411
1412         *attrflagp = 0;
1413         NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1414         if (nd->nd_flag & ND_NFSV4)
1415                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1416         vap->va_type = vnode_vtype(vp);
1417         nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1418         if (nd->nd_flag & ND_NFSV3) {
1419                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1420                 *tl = newnfs_false;
1421         } else if (nd->nd_flag & ND_NFSV4) {
1422                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1423                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1424                 NFSGETATTR_ATTRBIT(&attrbits);
1425                 (void) nfsrv_putattrbit(nd, &attrbits);
1426         }
1427         error = nfscl_request(nd, vp, p, cred, stuff);
1428         if (error)
1429                 return (error);
1430         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1431                 error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1432         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1433                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1434         if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1435                 error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1436         m_freem(nd->nd_mrep);
1437         if (nd->nd_repstat && !error)
1438                 error = nd->nd_repstat;
1439         return (error);
1440 }
1441
1442 /*
1443  * nfs lookup rpc
1444  */
1445 int
1446 nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1447     NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1448     struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1449 {
1450         u_int32_t *tl;
1451         struct nfsrv_descript nfsd, *nd = &nfsd;
1452         struct nfsmount *nmp;
1453         struct nfsnode *np;
1454         struct nfsfh *nfhp;
1455         nfsattrbit_t attrbits;
1456         int error = 0, lookupp = 0;
1457
1458         *attrflagp = 0;
1459         *dattrflagp = 0;
1460         if (vnode_vtype(dvp) != VDIR)
1461                 return (ENOTDIR);
1462         nmp = VFSTONFS(dvp->v_mount);
1463         if (len > NFS_MAXNAMLEN)
1464                 return (ENAMETOOLONG);
1465         if (NFSHASNFSV4(nmp) && len == 1 &&
1466                 name[0] == '.') {
1467                 /*
1468                  * Just return the current dir's fh.
1469                  */
1470                 np = VTONFS(dvp);
1471                 nfhp = malloc(sizeof (struct nfsfh) +
1472                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1473                 nfhp->nfh_len = np->n_fhp->nfh_len;
1474                 NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1475                 *nfhpp = nfhp;
1476                 return (0);
1477         }
1478         if (NFSHASNFSV4(nmp) && len == 2 &&
1479                 name[0] == '.' && name[1] == '.') {
1480                 lookupp = 1;
1481                 NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1482         } else {
1483                 NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1484                 (void) nfsm_strtom(nd, name, len);
1485         }
1486         if (nd->nd_flag & ND_NFSV4) {
1487                 NFSGETATTR_ATTRBIT(&attrbits);
1488                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1489                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
1490                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1491                 (void) nfsrv_putattrbit(nd, &attrbits);
1492         }
1493         error = nfscl_request(nd, dvp, p, cred, stuff);
1494         if (error)
1495                 return (error);
1496         if (nd->nd_repstat) {
1497                 /*
1498                  * When an NFSv4 Lookupp returns ENOENT, it means that
1499                  * the lookup is at the root of an fs, so return this dir.
1500                  */
1501                 if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1502                     np = VTONFS(dvp);
1503                     nfhp = malloc(sizeof (struct nfsfh) +
1504                         np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1505                     nfhp->nfh_len = np->n_fhp->nfh_len;
1506                     NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1507                     *nfhpp = nfhp;
1508                     m_freem(nd->nd_mrep);
1509                     return (0);
1510                 }
1511                 if (nd->nd_flag & ND_NFSV3)
1512                     error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1513                 else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1514                     ND_NFSV4) {
1515                         /* Load the directory attributes. */
1516                         error = nfsm_loadattr(nd, dnap);
1517                         if (error == 0)
1518                                 *dattrflagp = 1;
1519                 }
1520                 goto nfsmout;
1521         }
1522         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1523                 /* Load the directory attributes. */
1524                 error = nfsm_loadattr(nd, dnap);
1525                 if (error != 0)
1526                         goto nfsmout;
1527                 *dattrflagp = 1;
1528                 /* Skip over the Lookup and GetFH operation status values. */
1529                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1530         }
1531         error = nfsm_getfh(nd, nfhpp);
1532         if (error)
1533                 goto nfsmout;
1534
1535         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1536         if ((nd->nd_flag & ND_NFSV3) && !error)
1537                 error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1538 nfsmout:
1539         m_freem(nd->nd_mrep);
1540         if (!error && nd->nd_repstat)
1541                 error = nd->nd_repstat;
1542         return (error);
1543 }
1544
1545 /*
1546  * Do a readlink rpc.
1547  */
1548 int
1549 nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1550     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1551 {
1552         u_int32_t *tl;
1553         struct nfsrv_descript nfsd, *nd = &nfsd;
1554         struct nfsnode *np = VTONFS(vp);
1555         nfsattrbit_t attrbits;
1556         int error, len, cangetattr = 1;
1557
1558         *attrflagp = 0;
1559         NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1560         if (nd->nd_flag & ND_NFSV4) {
1561                 /*
1562                  * And do a Getattr op.
1563                  */
1564                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1565                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
1566                 NFSGETATTR_ATTRBIT(&attrbits);
1567                 (void) nfsrv_putattrbit(nd, &attrbits);
1568         }
1569         error = nfscl_request(nd, vp, p, cred, stuff);
1570         if (error)
1571                 return (error);
1572         if (nd->nd_flag & ND_NFSV3)
1573                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1574         if (!nd->nd_repstat && !error) {
1575                 NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1576                 /*
1577                  * This seems weird to me, but must have been added to
1578                  * FreeBSD for some reason. The only thing I can think of
1579                  * is that there was/is some server that replies with
1580                  * more link data than it should?
1581                  */
1582                 if (len == NFS_MAXPATHLEN) {
1583                         NFSLOCKNODE(np);
1584                         if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1585                                 len = np->n_size;
1586                                 cangetattr = 0;
1587                         }
1588                         NFSUNLOCKNODE(np);
1589                 }
1590                 error = nfsm_mbufuio(nd, uiop, len);
1591                 if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1592                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1593         }
1594         if (nd->nd_repstat && !error)
1595                 error = nd->nd_repstat;
1596 nfsmout:
1597         m_freem(nd->nd_mrep);
1598         return (error);
1599 }
1600
1601 /*
1602  * Read operation.
1603  */
1604 int
1605 nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1606     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1607 {
1608         int error, expireret = 0, retrycnt;
1609         u_int32_t clidrev = 0;
1610         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1611         struct nfsnode *np = VTONFS(vp);
1612         struct ucred *newcred;
1613         struct nfsfh *nfhp = NULL;
1614         nfsv4stateid_t stateid;
1615         void *lckp;
1616
1617         if (nmp->nm_clp != NULL)
1618                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1619         newcred = cred;
1620         if (NFSHASNFSV4(nmp)) {
1621                 nfhp = np->n_fhp;
1622                 newcred = NFSNEWCRED(cred);
1623         }
1624         retrycnt = 0;
1625         do {
1626                 lckp = NULL;
1627                 if (NFSHASNFSV4(nmp))
1628                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1629                             NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1630                             &lckp);
1631                 error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1632                     attrflagp, stuff);
1633                 if (error == NFSERR_OPENMODE) {
1634                         NFSLOCKMNT(nmp);
1635                         nmp->nm_state |= NFSSTA_OPENMODE;
1636                         NFSUNLOCKMNT(nmp);
1637                 }
1638                 if (error == NFSERR_STALESTATEID)
1639                         nfscl_initiate_recovery(nmp->nm_clp);
1640                 if (lckp != NULL)
1641                         nfscl_lockderef(lckp);
1642                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1643                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1644                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1645                         (void) nfs_catnap(PZERO, error, "nfs_read");
1646                 } else if ((error == NFSERR_EXPIRED ||
1647                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1648                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1649                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1650                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1651                     NFSHASNFSV4N(nmp)) {
1652                         error = EIO;
1653                 }
1654                 retrycnt++;
1655         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1656             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1657             error == NFSERR_BADSESSION ||
1658             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1659             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1660              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1661             (error == NFSERR_OPENMODE && retrycnt < 4));
1662         if (error && retrycnt >= 4)
1663                 error = EIO;
1664         if (NFSHASNFSV4(nmp))
1665                 NFSFREECRED(newcred);
1666         return (error);
1667 }
1668
1669 /*
1670  * The actual read RPC.
1671  */
1672 static int
1673 nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1674     nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1675     int *attrflagp, void *stuff)
1676 {
1677         u_int32_t *tl;
1678         int error = 0, len, retlen, tsiz, eof = 0;
1679         struct nfsrv_descript nfsd;
1680         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1681         struct nfsrv_descript *nd = &nfsd;
1682         int rsize;
1683         off_t tmp_off;
1684
1685         *attrflagp = 0;
1686         tsiz = uiop->uio_resid;
1687         tmp_off = uiop->uio_offset + tsiz;
1688         NFSLOCKMNT(nmp);
1689         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1690                 NFSUNLOCKMNT(nmp);
1691                 return (EFBIG);
1692         }
1693         rsize = nmp->nm_rsize;
1694         NFSUNLOCKMNT(nmp);
1695         nd->nd_mrep = NULL;
1696         while (tsiz > 0) {
1697                 *attrflagp = 0;
1698                 len = (tsiz > rsize) ? rsize : tsiz;
1699                 NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1700                 if (nd->nd_flag & ND_NFSV4)
1701                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1702                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1703                 if (nd->nd_flag & ND_NFSV2) {
1704                         *tl++ = txdr_unsigned(uiop->uio_offset);
1705                         *tl++ = txdr_unsigned(len);
1706                         *tl = 0;
1707                 } else {
1708                         txdr_hyper(uiop->uio_offset, tl);
1709                         *(tl + 2) = txdr_unsigned(len);
1710                 }
1711                 /*
1712                  * Since I can't do a Getattr for NFSv4 for Write, there
1713                  * doesn't seem any point in doing one here, either.
1714                  * (See the comment in nfsrpc_writerpc() for more info.)
1715                  */
1716                 error = nfscl_request(nd, vp, p, cred, stuff);
1717                 if (error)
1718                         return (error);
1719                 if (nd->nd_flag & ND_NFSV3) {
1720                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1721                 } else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1722                         error = nfsm_loadattr(nd, nap);
1723                         if (!error)
1724                                 *attrflagp = 1;
1725                 }
1726                 if (nd->nd_repstat || error) {
1727                         if (!error)
1728                                 error = nd->nd_repstat;
1729                         goto nfsmout;
1730                 }
1731                 if (nd->nd_flag & ND_NFSV3) {
1732                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1733                         eof = fxdr_unsigned(int, *(tl + 1));
1734                 } else if (nd->nd_flag & ND_NFSV4) {
1735                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1736                         eof = fxdr_unsigned(int, *tl);
1737                 }
1738                 NFSM_STRSIZ(retlen, len);
1739                 error = nfsm_mbufuio(nd, uiop, retlen);
1740                 if (error)
1741                         goto nfsmout;
1742                 m_freem(nd->nd_mrep);
1743                 nd->nd_mrep = NULL;
1744                 tsiz -= retlen;
1745                 if (!(nd->nd_flag & ND_NFSV2)) {
1746                         if (eof || retlen == 0)
1747                                 tsiz = 0;
1748                 } else if (retlen < len)
1749                         tsiz = 0;
1750         }
1751         return (0);
1752 nfsmout:
1753         if (nd->nd_mrep != NULL)
1754                 m_freem(nd->nd_mrep);
1755         return (error);
1756 }
1757
1758 /*
1759  * nfs write operation
1760  * When called_from_strategy != 0, it should return EIO for an error that
1761  * indicates recovery is in progress, so that the buffer will be left
1762  * dirty and be written back to the server later. If it loops around,
1763  * the recovery thread could get stuck waiting for the buffer and recovery
1764  * will then deadlock.
1765  */
1766 int
1767 nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1768     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1769     int called_from_strategy, int ioflag)
1770 {
1771         int error, expireret = 0, retrycnt, nostateid;
1772         u_int32_t clidrev = 0;
1773         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1774         struct nfsnode *np = VTONFS(vp);
1775         struct ucred *newcred;
1776         struct nfsfh *nfhp = NULL;
1777         nfsv4stateid_t stateid;
1778         void *lckp;
1779
1780         KASSERT(*must_commit >= 0 && *must_commit <= 2,
1781             ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1782         if (nmp->nm_clp != NULL)
1783                 clidrev = nmp->nm_clp->nfsc_clientidrev;
1784         newcred = cred;
1785         if (NFSHASNFSV4(nmp)) {
1786                 newcred = NFSNEWCRED(cred);
1787                 nfhp = np->n_fhp;
1788         }
1789         retrycnt = 0;
1790         do {
1791                 lckp = NULL;
1792                 nostateid = 0;
1793                 if (NFSHASNFSV4(nmp)) {
1794                         (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1795                             NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1796                             &lckp);
1797                         if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1798                             stateid.other[2] == 0) {
1799                                 nostateid = 1;
1800                                 NFSCL_DEBUG(1, "stateid0 in write\n");
1801                         }
1802                 }
1803
1804                 /*
1805                  * If there is no stateid for NFSv4, it means this is an
1806                  * extraneous write after close. Basically a poorly
1807                  * implemented buffer cache. Just don't do the write.
1808                  */
1809                 if (nostateid)
1810                         error = 0;
1811                 else
1812                         error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1813                             newcred, &stateid, p, nap, attrflagp, ioflag);
1814                 if (error == NFSERR_STALESTATEID)
1815                         nfscl_initiate_recovery(nmp->nm_clp);
1816                 if (lckp != NULL)
1817                         nfscl_lockderef(lckp);
1818                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1819                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1820                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1821                         (void) nfs_catnap(PZERO, error, "nfs_write");
1822                 } else if ((error == NFSERR_EXPIRED ||
1823                     ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1824                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
1825                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1826                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1827                     NFSHASNFSV4N(nmp)) {
1828                         error = EIO;
1829                 }
1830                 retrycnt++;
1831         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1832             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1833               error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1834             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1835             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1836              expireret == 0 && clidrev != 0 && retrycnt < 4));
1837         if (error != 0 && (retrycnt >= 4 ||
1838             ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1839               error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1840                 error = EIO;
1841         if (NFSHASNFSV4(nmp))
1842                 NFSFREECRED(newcred);
1843         return (error);
1844 }
1845
1846 /*
1847  * The actual write RPC.
1848  */
1849 static int
1850 nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1851     int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1852     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
1853 {
1854         u_int32_t *tl;
1855         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1856         struct nfsnode *np = VTONFS(vp);
1857         int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
1858         int wccflag = 0;
1859         int32_t backup;
1860         struct nfsrv_descript *nd;
1861         nfsattrbit_t attrbits;
1862         uint64_t tmp_off;
1863         ssize_t tsiz, wsize;
1864         bool do_append;
1865
1866         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1867         *attrflagp = 0;
1868         tsiz = uiop->uio_resid;
1869         tmp_off = uiop->uio_offset + tsiz;
1870         NFSLOCKMNT(nmp);
1871         if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1872                 NFSUNLOCKMNT(nmp);
1873                 return (EFBIG);
1874         }
1875         wsize = nmp->nm_wsize;
1876         do_append = false;
1877         if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
1878                 do_append = true;
1879         NFSUNLOCKMNT(nmp);
1880         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
1881         nd->nd_mrep = NULL;     /* NFSv2 sometimes does a write with */
1882         nd->nd_repstat = 0;     /* uio_resid == 0, so the while is not done */
1883         while (tsiz > 0) {
1884                 *attrflagp = 0;
1885                 len = (tsiz > wsize) ? wsize : tsiz;
1886                 if (do_append)
1887                         NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
1888                 else
1889                         NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
1890                 if (nd->nd_flag & ND_NFSV4) {
1891                         if (do_append) {
1892                                 NFSZERO_ATTRBIT(&attrbits);
1893                                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
1894                                 nfsrv_putattrbit(nd, &attrbits);
1895                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
1896                                     NFSX_HYPER);
1897                                 *tl++ = txdr_unsigned(NFSX_HYPER);
1898                                 txdr_hyper(uiop->uio_offset, tl); tl += 2;
1899                                 *tl = txdr_unsigned(NFSV4OP_WRITE);
1900                         }
1901                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1902                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1903                         txdr_hyper(uiop->uio_offset, tl);
1904                         tl += 2;
1905                         *tl++ = txdr_unsigned(*iomode);
1906                         *tl = txdr_unsigned(len);
1907                 } else if (nd->nd_flag & ND_NFSV3) {
1908                         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1909                         txdr_hyper(uiop->uio_offset, tl);
1910                         tl += 2;
1911                         *tl++ = txdr_unsigned(len);
1912                         *tl++ = txdr_unsigned(*iomode);
1913                         *tl = txdr_unsigned(len);
1914                 } else {
1915                         u_int32_t x;
1916
1917                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1918                         /*
1919                          * Not sure why someone changed this, since the
1920                          * RFC clearly states that "beginoffset" and
1921                          * "totalcount" are ignored, but it wouldn't
1922                          * surprise me if there's a busted server out there.
1923                          */
1924                         /* Set both "begin" and "current" to non-garbage. */
1925                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1926                         *tl++ = x;      /* "begin offset" */
1927                         *tl++ = x;      /* "current offset" */
1928                         x = txdr_unsigned(len);
1929                         *tl++ = x;      /* total to this offset */
1930                         *tl = x;        /* size of this write */
1931                 }
1932                 error = nfsm_uiombuf(nd, uiop, len);
1933                 if (error != 0) {
1934                         m_freem(nd->nd_mreq);
1935                         free(nd, M_TEMP);
1936                         return (error);
1937                 }
1938                 /*
1939                  * Although it is tempting to do a normal Getattr Op in the
1940                  * NFSv4 compound, the result can be a nearly hung client
1941                  * system if the Getattr asks for Owner and/or OwnerGroup.
1942                  * It occurs when the client can't map either the Owner or
1943                  * Owner_group name in the Getattr reply to a uid/gid. When
1944                  * there is a cache miss, the kernel does an upcall to the
1945                  * nfsuserd. Then, it can try and read the local /etc/passwd
1946                  * or /etc/group file. It can then block in getnewbuf(),
1947                  * waiting for dirty writes to be pushed to the NFS server.
1948                  * The only reason this doesn't result in a complete
1949                  * deadlock, is that the upcall times out and allows
1950                  * the write to complete. However, progress is so slow
1951                  * that it might just as well be deadlocked.
1952                  * As such, we get the rest of the attributes, but not
1953                  * Owner or Owner_group.
1954                  * nb: nfscl_loadattrcache() needs to be told that these
1955                  *     partial attributes from a write rpc are being
1956                  *     passed in, via a argument flag.
1957                  */
1958                 if (nd->nd_flag & ND_NFSV4) {
1959                         NFSWRITEGETATTR_ATTRBIT(&attrbits);
1960                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1961                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
1962                         (void) nfsrv_putattrbit(nd, &attrbits);
1963                 }
1964                 error = nfscl_request(nd, vp, p, cred, NULL);
1965                 if (error) {
1966                         free(nd, M_TEMP);
1967                         return (error);
1968                 }
1969                 if (nd->nd_repstat) {
1970                         /*
1971                          * In case the rpc gets retried, roll
1972                          * the uio fileds changed by nfsm_uiombuf()
1973                          * back.
1974                          */
1975                         uiop->uio_offset -= len;
1976                         uiop->uio_resid += len;
1977                         uiop->uio_iov->iov_base =
1978                             (char *)uiop->uio_iov->iov_base - len;
1979                         uiop->uio_iov->iov_len += len;
1980                 }
1981                 if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1982                         error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1983                             &wccflag, &tmp_off);
1984                         if (error)
1985                                 goto nfsmout;
1986                 }
1987                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1988                     (ND_NFSV4 | ND_NOMOREDATA) &&
1989                     nd->nd_repstat == NFSERR_NOTSAME && do_append) {
1990                         /*
1991                          * Verify of the file's size failed, so redo the
1992                          * write using the file's size as returned in
1993                          * the wcc attributes.
1994                          */
1995                         if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
1996                                 do_append = false;
1997                                 uiop->uio_offset = tmp_off;
1998                                 m_freem(nd->nd_mrep);
1999                                 nd->nd_mrep = NULL;
2000                                 continue;
2001                         } else
2002                                 nd->nd_repstat = EFBIG;
2003                 }
2004                 if (!nd->nd_repstat) {
2005                         if (do_append) {
2006                                 /* Strip off the Write reply status. */
2007                                 do_append = false;
2008                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2009                         }
2010                         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2011                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2012                                         + NFSX_VERF);
2013                                 rlen = fxdr_unsigned(int, *tl++);
2014                                 if (rlen == 0) {
2015                                         error = NFSERR_IO;
2016                                         goto nfsmout;
2017                                 } else if (rlen < len) {
2018                                         backup = len - rlen;
2019                                         uiop->uio_iov->iov_base =
2020                                             (char *)uiop->uio_iov->iov_base -
2021                                             backup;
2022                                         uiop->uio_iov->iov_len += backup;
2023                                         uiop->uio_offset -= backup;
2024                                         uiop->uio_resid += backup;
2025                                         len = rlen;
2026                                 }
2027                                 commit = fxdr_unsigned(int, *tl++);
2028
2029                                 /*
2030                                  * Return the lowest commitment level
2031                                  * obtained by any of the RPCs.
2032                                  */
2033                                 if (committed == NFSWRITE_FILESYNC)
2034                                         committed = commit;
2035                                 else if (committed == NFSWRITE_DATASYNC &&
2036                                         commit == NFSWRITE_UNSTABLE)
2037                                         committed = commit;
2038                                 NFSLOCKMNT(nmp);
2039                                 if (!NFSHASWRITEVERF(nmp)) {
2040                                         NFSBCOPY((caddr_t)tl,
2041                                             (caddr_t)&nmp->nm_verf[0],
2042                                             NFSX_VERF);
2043                                         NFSSETWRITEVERF(nmp);
2044                                 } else if (NFSBCMP(tl, nmp->nm_verf,
2045                                     NFSX_VERF) && *must_commit != 2) {
2046                                         *must_commit = 1;
2047                                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2048                                 }
2049                                 NFSUNLOCKMNT(nmp);
2050                         }
2051                         if (nd->nd_flag & ND_NFSV4)
2052                                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2053                         if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2054                                 error = nfsm_loadattr(nd, nap);
2055                                 if (!error)
2056                                         *attrflagp = NFS_LATTR_NOSHRINK;
2057                         }
2058                 } else {
2059                         error = nd->nd_repstat;
2060                 }
2061                 if (error)
2062                         goto nfsmout;
2063                 NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2064                 m_freem(nd->nd_mrep);
2065                 nd->nd_mrep = NULL;
2066                 tsiz -= len;
2067         }
2068 nfsmout:
2069         if (nd->nd_mrep != NULL)
2070                 m_freem(nd->nd_mrep);
2071         *iomode = committed;
2072         if (nd->nd_repstat && !error)
2073                 error = nd->nd_repstat;
2074         free(nd, M_TEMP);
2075         return (error);
2076 }
2077
2078 /*
2079  * nfs mknod rpc
2080  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2081  * mode set to specify the file type and the size field for rdev.
2082  */
2083 int
2084 nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2085     u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
2086     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2087     int *attrflagp, int *dattrflagp, void *dstuff)
2088 {
2089         u_int32_t *tl;
2090         int error = 0;
2091         struct nfsrv_descript nfsd, *nd = &nfsd;
2092         nfsattrbit_t attrbits;
2093
2094         *nfhpp = NULL;
2095         *attrflagp = 0;
2096         *dattrflagp = 0;
2097         if (namelen > NFS_MAXNAMLEN)
2098                 return (ENAMETOOLONG);
2099         NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2100         if (nd->nd_flag & ND_NFSV4) {
2101                 if (vtyp == VBLK || vtyp == VCHR) {
2102                         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2103                         *tl++ = vtonfsv34_type(vtyp);
2104                         *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2105                         *tl = txdr_unsigned(NFSMINOR(rdev));
2106                 } else {
2107                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2108                         *tl = vtonfsv34_type(vtyp);
2109                 }
2110         }
2111         (void) nfsm_strtom(nd, name, namelen);
2112         if (nd->nd_flag & ND_NFSV3) {
2113                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2114                 *tl = vtonfsv34_type(vtyp);
2115         }
2116         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2117                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2118         if ((nd->nd_flag & ND_NFSV3) &&
2119             (vtyp == VCHR || vtyp == VBLK)) {
2120                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2121                 *tl++ = txdr_unsigned(NFSMAJOR(rdev));
2122                 *tl = txdr_unsigned(NFSMINOR(rdev));
2123         }
2124         if (nd->nd_flag & ND_NFSV4) {
2125                 NFSGETATTR_ATTRBIT(&attrbits);
2126                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2127                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2128                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2129                 (void) nfsrv_putattrbit(nd, &attrbits);
2130         }
2131         if (nd->nd_flag & ND_NFSV2)
2132                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2133         error = nfscl_request(nd, dvp, p, cred, dstuff);
2134         if (error)
2135                 return (error);
2136         if (nd->nd_flag & ND_NFSV4)
2137                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2138         if (!nd->nd_repstat) {
2139                 if (nd->nd_flag & ND_NFSV4) {
2140                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2141                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2142                         if (error)
2143                                 goto nfsmout;
2144                 }
2145                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2146                 if (error)
2147                         goto nfsmout;
2148         }
2149         if (nd->nd_flag & ND_NFSV3)
2150                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2151         if (!error && nd->nd_repstat)
2152                 error = nd->nd_repstat;
2153 nfsmout:
2154         m_freem(nd->nd_mrep);
2155         return (error);
2156 }
2157
2158 /*
2159  * nfs file create call
2160  * Mostly just call the approriate routine. (I separated out v4, so that
2161  * error recovery wouldn't be as difficult.)
2162  */
2163 int
2164 nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2165     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2166     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2167     int *attrflagp, int *dattrflagp, void *dstuff)
2168 {
2169         int error = 0, newone, expireret = 0, retrycnt, unlocked;
2170         struct nfsclowner *owp;
2171         struct nfscldeleg *dp;
2172         struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2173         u_int32_t clidrev;
2174
2175         if (NFSHASNFSV4(nmp)) {
2176             retrycnt = 0;
2177             do {
2178                 dp = NULL;
2179                 error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2180                     NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2181                     NULL, 1, true);
2182                 if (error)
2183                         return (error);
2184                 if (nmp->nm_clp != NULL)
2185                         clidrev = nmp->nm_clp->nfsc_clientidrev;
2186                 else
2187                         clidrev = 0;
2188                 if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2189                     nfs_numnfscbd == 0 || retrycnt > 0)
2190                         error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2191                           fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2192                           attrflagp, dattrflagp, dstuff, &unlocked);
2193                 else
2194                         error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2195                           cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2196                           attrflagp, dattrflagp, dstuff, &unlocked);
2197                 /*
2198                  * There is no need to invalidate cached attributes here,
2199                  * since new post-delegation issue attributes are always
2200                  * returned by nfsrpc_createv4() and these will update the
2201                  * attribute cache.
2202                  */
2203                 if (dp != NULL)
2204                         (void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2205                             (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2206                 nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2207                 if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2208                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2209                     error == NFSERR_BADSESSION) {
2210                         (void) nfs_catnap(PZERO, error, "nfs_open");
2211                 } else if ((error == NFSERR_EXPIRED ||
2212                     error == NFSERR_BADSTATEID) && clidrev != 0) {
2213                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2214                         retrycnt++;
2215                 }
2216             } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2217                 error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2218                 error == NFSERR_BADSESSION ||
2219                 ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2220                  expireret == 0 && clidrev != 0 && retrycnt < 4));
2221             if (error && retrycnt >= 4)
2222                     error = EIO;
2223         } else {
2224                 error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2225                     fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
2226                     dstuff);
2227         }
2228         return (error);
2229 }
2230
2231 /*
2232  * The create rpc for v2 and 3.
2233  */
2234 static int
2235 nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2236     nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2237     struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2238     int *attrflagp, int *dattrflagp, void *dstuff)
2239 {
2240         u_int32_t *tl;
2241         int error = 0;
2242         struct nfsrv_descript nfsd, *nd = &nfsd;
2243
2244         *nfhpp = NULL;
2245         *attrflagp = 0;
2246         *dattrflagp = 0;
2247         if (namelen > NFS_MAXNAMLEN)
2248                 return (ENAMETOOLONG);
2249         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2250         (void) nfsm_strtom(nd, name, namelen);
2251         if (nd->nd_flag & ND_NFSV3) {
2252                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2253                 if (fmode & O_EXCL) {
2254                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2255                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2256                         *tl++ = cverf.lval[0];
2257                         *tl = cverf.lval[1];
2258                 } else {
2259                         *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2260                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
2261                 }
2262         } else {
2263                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2264         }
2265         error = nfscl_request(nd, dvp, p, cred, dstuff);
2266         if (error)
2267                 return (error);
2268         if (nd->nd_repstat == 0) {
2269                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2270                 if (error)
2271                         goto nfsmout;
2272         }
2273         if (nd->nd_flag & ND_NFSV3)
2274                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2275         if (nd->nd_repstat != 0 && error == 0)
2276                 error = nd->nd_repstat;
2277 nfsmout:
2278         m_freem(nd->nd_mrep);
2279         return (error);
2280 }
2281
2282 static int
2283 nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2284     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2285     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2286     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2287     int *dattrflagp, void *dstuff, int *unlockedp)
2288 {
2289         u_int32_t *tl;
2290         int error = 0, deleg, newone, ret, acesize, limitby;
2291         struct nfsrv_descript nfsd, *nd = &nfsd;
2292         struct nfsclopen *op;
2293         struct nfscldeleg *dp = NULL;
2294         struct nfsnode *np;
2295         struct nfsfh *nfhp;
2296         nfsattrbit_t attrbits;
2297         nfsv4stateid_t stateid;
2298         u_int32_t rflags;
2299         struct nfsmount *nmp;
2300         struct nfsclsession *tsep;
2301
2302         nmp = VFSTONFS(dvp->v_mount);
2303         np = VTONFS(dvp);
2304         *unlockedp = 0;
2305         *nfhpp = NULL;
2306         *dpp = NULL;
2307         *attrflagp = 0;
2308         *dattrflagp = 0;
2309         if (namelen > NFS_MAXNAMLEN)
2310                 return (ENAMETOOLONG);
2311         NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2312         /*
2313          * For V4, this is actually an Open op.
2314          */
2315         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2316         *tl++ = txdr_unsigned(owp->nfsow_seqid);
2317         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2318             NFSV4OPEN_ACCESSREAD);
2319         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2320         tsep = nfsmnt_mdssession(nmp);
2321         *tl++ = tsep->nfsess_clientid.lval[0];
2322         *tl = tsep->nfsess_clientid.lval[1];
2323         (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2324         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2325         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2326         if (fmode & O_EXCL) {
2327                 if (NFSHASNFSV4N(nmp)) {
2328                         if (NFSHASSESSPERSIST(nmp)) {
2329                                 /* Use GUARDED for persistent sessions. */
2330                                 *tl = txdr_unsigned(NFSCREATE_GUARDED);
2331                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2332                         } else {
2333                                 /* Otherwise, use EXCLUSIVE4_1. */
2334                                 *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2335                                 NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2336                                 *tl++ = cverf.lval[0];
2337                                 *tl = cverf.lval[1];
2338                                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2339                         }
2340                 } else {
2341                         /* NFSv4.0 */
2342                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2343                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2344                         *tl++ = cverf.lval[0];
2345                         *tl = cverf.lval[1];
2346                 }
2347         } else {
2348                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2349                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2350         }
2351         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2352         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2353         (void) nfsm_strtom(nd, name, namelen);
2354         /* Get the new file's handle and attributes. */
2355         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2356         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2357         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2358         NFSGETATTR_ATTRBIT(&attrbits);
2359         (void) nfsrv_putattrbit(nd, &attrbits);
2360         /* Get the directory's post-op attributes. */
2361         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2362         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2363         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2364         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2365         *tl = txdr_unsigned(NFSV4OP_GETATTR);
2366         (void) nfsrv_putattrbit(nd, &attrbits);
2367         error = nfscl_request(nd, dvp, p, cred, dstuff);
2368         if (error)
2369                 return (error);
2370         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2371         if (nd->nd_repstat == 0) {
2372                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2373                     6 * NFSX_UNSIGNED);
2374                 stateid.seqid = *tl++;
2375                 stateid.other[0] = *tl++;
2376                 stateid.other[1] = *tl++;
2377                 stateid.other[2] = *tl;
2378                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2379                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2380                 if (error)
2381                         goto nfsmout;
2382                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2383                 deleg = fxdr_unsigned(int, *tl);
2384                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
2385                     deleg == NFSV4OPEN_DELEGATEWRITE) {
2386                         if (!(owp->nfsow_clp->nfsc_flags &
2387                               NFSCLFLAGS_FIRSTDELEG))
2388                                 owp->nfsow_clp->nfsc_flags |=
2389                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2390                         dp = malloc(
2391                             sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2392                             M_NFSCLDELEG, M_WAITOK);
2393                         LIST_INIT(&dp->nfsdl_owner);
2394                         LIST_INIT(&dp->nfsdl_lock);
2395                         dp->nfsdl_clp = owp->nfsow_clp;
2396                         newnfs_copyincred(cred, &dp->nfsdl_cred);
2397                         nfscl_lockinit(&dp->nfsdl_rwlock);
2398                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2399                             NFSX_UNSIGNED);
2400                         dp->nfsdl_stateid.seqid = *tl++;
2401                         dp->nfsdl_stateid.other[0] = *tl++;
2402                         dp->nfsdl_stateid.other[1] = *tl++;
2403                         dp->nfsdl_stateid.other[2] = *tl++;
2404                         ret = fxdr_unsigned(int, *tl);
2405                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2406                                 dp->nfsdl_flags = NFSCLDL_WRITE;
2407                                 /*
2408                                  * Indicates how much the file can grow.
2409                                  */
2410                                 NFSM_DISSECT(tl, u_int32_t *,
2411                                     3 * NFSX_UNSIGNED);
2412                                 limitby = fxdr_unsigned(int, *tl++);
2413                                 switch (limitby) {
2414                                 case NFSV4OPEN_LIMITSIZE:
2415                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
2416                                         break;
2417                                 case NFSV4OPEN_LIMITBLOCKS:
2418                                         dp->nfsdl_sizelimit =
2419                                             fxdr_unsigned(u_int64_t, *tl++);
2420                                         dp->nfsdl_sizelimit *=
2421                                             fxdr_unsigned(u_int64_t, *tl);
2422                                         break;
2423                                 default:
2424                                         error = NFSERR_BADXDR;
2425                                         goto nfsmout;
2426                                 }
2427                         } else {
2428                                 dp->nfsdl_flags = NFSCLDL_READ;
2429                         }
2430                         if (ret)
2431                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
2432                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2433                             &ret, &acesize, p);
2434                         if (error)
2435                                 goto nfsmout;
2436                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
2437                         error = NFSERR_BADXDR;
2438                         goto nfsmout;
2439                 }
2440                 error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2441                 if (error)
2442                         goto nfsmout;
2443                 /* Get rid of the PutFH and Getattr status values. */
2444                 NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2445                 /* Load the directory attributes. */
2446                 error = nfsm_loadattr(nd, dnap);
2447                 if (error)
2448                         goto nfsmout;
2449                 *dattrflagp = 1;
2450                 if (dp != NULL && *attrflagp) {
2451                         dp->nfsdl_change = nnap->na_filerev;
2452                         dp->nfsdl_modtime = nnap->na_mtime;
2453                         dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2454                 }
2455                 /*
2456                  * We can now complete the Open state.
2457                  */
2458                 nfhp = *nfhpp;
2459                 if (dp != NULL) {
2460                         dp->nfsdl_fhlen = nfhp->nfh_len;
2461                         NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2462                 }
2463                 /*
2464                  * Get an Open structure that will be
2465                  * attached to the OpenOwner, acquired already.
2466                  */
2467                 error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
2468                     (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2469                     cred, p, NULL, &op, &newone, NULL, 0, false);
2470                 if (error)
2471                         goto nfsmout;
2472                 op->nfso_stateid = stateid;
2473                 newnfs_copyincred(cred, &op->nfso_cred);
2474                 if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2475                     do {
2476                         ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2477                             nfhp->nfh_len, op, cred, p);
2478                         if (ret == NFSERR_DELAY)
2479                             (void) nfs_catnap(PZERO, ret, "nfs_create");
2480                     } while (ret == NFSERR_DELAY);
2481                     error = ret;
2482                 }
2483
2484                 /*
2485                  * If the server is handing out delegations, but we didn't
2486                  * get one because an OpenConfirm was required, try the
2487                  * Open again, to get a delegation. This is a harmless no-op,
2488                  * from a server's point of view.
2489                  */
2490                 if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2491                     (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2492                     !error && dp == NULL) {
2493                     do {
2494                         ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2495                             np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2496                             nfhp->nfh_fh, nfhp->nfh_len,
2497                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2498                             name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2499                         if (ret == NFSERR_DELAY)
2500                             (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2501                     } while (ret == NFSERR_DELAY);
2502                     if (ret) {
2503                         if (dp != NULL) {
2504                                 free(dp, M_NFSCLDELEG);
2505                                 dp = NULL;
2506                         }
2507                         if (ret == NFSERR_STALECLIENTID ||
2508                             ret == NFSERR_STALEDONTRECOVER ||
2509                             ret == NFSERR_BADSESSION)
2510                                 error = ret;
2511                     }
2512                 }
2513                 nfscl_openrelease(nmp, op, error, newone);
2514                 *unlockedp = 1;
2515         }
2516         if (nd->nd_repstat != 0 && error == 0)
2517                 error = nd->nd_repstat;
2518         if (error == NFSERR_STALECLIENTID)
2519                 nfscl_initiate_recovery(owp->nfsow_clp);
2520 nfsmout:
2521         if (!error)
2522                 *dpp = dp;
2523         else if (dp != NULL)
2524                 free(dp, M_NFSCLDELEG);
2525         m_freem(nd->nd_mrep);
2526         return (error);
2527 }
2528
2529 /*
2530  * Nfs remove rpc
2531  */
2532 int
2533 nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2534     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2535     void *dstuff)
2536 {
2537         u_int32_t *tl;
2538         struct nfsrv_descript nfsd, *nd = &nfsd;
2539         struct nfsnode *np;
2540         struct nfsmount *nmp;
2541         nfsv4stateid_t dstateid;
2542         int error, ret = 0, i;
2543
2544         *dattrflagp = 0;
2545         if (namelen > NFS_MAXNAMLEN)
2546                 return (ENAMETOOLONG);
2547         nmp = VFSTONFS(dvp->v_mount);
2548 tryagain:
2549         if (NFSHASNFSV4(nmp) && ret == 0) {
2550                 ret = nfscl_removedeleg(vp, p, &dstateid);
2551                 if (ret == 1) {
2552                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2553                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2554                             NFSX_UNSIGNED);
2555                         if (NFSHASNFSV4N(nmp))
2556                                 *tl++ = 0;
2557                         else
2558                                 *tl++ = dstateid.seqid;
2559                         *tl++ = dstateid.other[0];
2560                         *tl++ = dstateid.other[1];
2561                         *tl++ = dstateid.other[2];
2562                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2563                         np = VTONFS(dvp);
2564                         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2565                             np->n_fhp->nfh_len, 0);
2566                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2567                         *tl = txdr_unsigned(NFSV4OP_REMOVE);
2568                 }
2569         } else {
2570                 ret = 0;
2571         }
2572         if (ret == 0)
2573                 NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2574         (void) nfsm_strtom(nd, name, namelen);
2575         error = nfscl_request(nd, dvp, p, cred, dstuff);
2576         if (error)
2577                 return (error);
2578         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2579                 /* For NFSv4, parse out any Delereturn replies. */
2580                 if (ret > 0 && nd->nd_repstat != 0 &&
2581                     (nd->nd_flag & ND_NOMOREDATA)) {
2582                         /*
2583                          * If the Delegreturn failed, try again without
2584                          * it. The server will Recall, as required.
2585                          */
2586                         m_freem(nd->nd_mrep);
2587                         goto tryagain;
2588                 }
2589                 for (i = 0; i < (ret * 2); i++) {
2590                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2591                             ND_NFSV4) {
2592                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2593                             if (*(tl + 1))
2594                                 nd->nd_flag |= ND_NOMOREDATA;
2595                         }
2596                 }
2597                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2598         }
2599         if (nd->nd_repstat && !error)
2600                 error = nd->nd_repstat;
2601 nfsmout:
2602         m_freem(nd->nd_mrep);
2603         return (error);
2604 }
2605
2606 /*
2607  * Do an nfs rename rpc.
2608  */
2609 int
2610 nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2611     vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2612     NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2613     int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2614 {
2615         u_int32_t *tl;
2616         struct nfsrv_descript nfsd, *nd = &nfsd;
2617         struct nfsmount *nmp;
2618         struct nfsnode *np;
2619         nfsattrbit_t attrbits;
2620         nfsv4stateid_t fdstateid, tdstateid;
2621         int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2622
2623         *fattrflagp = 0;
2624         *tattrflagp = 0;
2625         nmp = VFSTONFS(fdvp->v_mount);
2626         if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2627                 return (ENAMETOOLONG);
2628 tryagain:
2629         if (NFSHASNFSV4(nmp) && ret == 0) {
2630                 ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2631                     &tdstateid, &gottd, p);
2632                 if (gotfd && gottd) {
2633                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2634                 } else if (gotfd) {
2635                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2636                 } else if (gottd) {
2637                         NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2638                 }
2639                 if (gotfd) {
2640                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2641                         if (NFSHASNFSV4N(nmp))
2642                                 *tl++ = 0;
2643                         else
2644                                 *tl++ = fdstateid.seqid;
2645                         *tl++ = fdstateid.other[0];
2646                         *tl++ = fdstateid.other[1];
2647                         *tl = fdstateid.other[2];
2648                         if (gottd) {
2649                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2650                                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2651                                 np = VTONFS(tvp);
2652                                 (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2653                                     np->n_fhp->nfh_len, 0);
2654                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2655                                 *tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2656                         }
2657                 }
2658                 if (gottd) {
2659                         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2660                         if (NFSHASNFSV4N(nmp))
2661                                 *tl++ = 0;
2662                         else
2663                                 *tl++ = tdstateid.seqid;
2664                         *tl++ = tdstateid.other[0];
2665                         *tl++ = tdstateid.other[1];
2666                         *tl = tdstateid.other[2];
2667                 }
2668                 if (ret > 0) {
2669                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2670                         *tl = txdr_unsigned(NFSV4OP_PUTFH);
2671                         np = VTONFS(fdvp);
2672                         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2673                             np->n_fhp->nfh_len, 0);
2674                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2675                         *tl = txdr_unsigned(NFSV4OP_SAVEFH);
2676                 }
2677         } else {
2678                 ret = 0;
2679         }
2680         if (ret == 0)
2681                 NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2682         if (nd->nd_flag & ND_NFSV4) {
2683                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2684                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2685                 NFSWCCATTR_ATTRBIT(&attrbits);
2686                 (void) nfsrv_putattrbit(nd, &attrbits);
2687                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2688                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2689                 (void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2690                     VTONFS(tdvp)->n_fhp->nfh_len, 0);
2691                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2692                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2693                 (void) nfsrv_putattrbit(nd, &attrbits);
2694                 nd->nd_flag |= ND_V4WCCATTR;
2695                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2696                 *tl = txdr_unsigned(NFSV4OP_RENAME);
2697         }
2698         (void) nfsm_strtom(nd, fnameptr, fnamelen);
2699         if (!(nd->nd_flag & ND_NFSV4))
2700                 (void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2701                         VTONFS(tdvp)->n_fhp->nfh_len, 0);
2702         (void) nfsm_strtom(nd, tnameptr, tnamelen);
2703         error = nfscl_request(nd, fdvp, p, cred, fstuff);
2704         if (error)
2705                 return (error);
2706         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2707                 /* For NFSv4, parse out any Delereturn replies. */
2708                 if (ret > 0 && nd->nd_repstat != 0 &&
2709                     (nd->nd_flag & ND_NOMOREDATA)) {
2710                         /*
2711                          * If the Delegreturn failed, try again without
2712                          * it. The server will Recall, as required.
2713                          */
2714                         m_freem(nd->nd_mrep);
2715                         goto tryagain;
2716                 }
2717                 for (i = 0; i < (ret * 2); i++) {
2718                         if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2719                             ND_NFSV4) {
2720                             NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2721                             if (*(tl + 1)) {
2722                                 if (i == 0 && ret > 1) {
2723                                     /*
2724                                      * If the Delegreturn failed, try again
2725                                      * without it. The server will Recall, as
2726                                      * required.
2727                                      * If ret > 1, the first iteration of this
2728                                      * loop is the second DelegReturn result.
2729                                      */
2730                                     m_freem(nd->nd_mrep);
2731                                     goto tryagain;
2732                                 } else {
2733                                     nd->nd_flag |= ND_NOMOREDATA;
2734                                 }
2735                             }
2736                         }
2737                 }
2738                 /* Now, the first wcc attribute reply. */
2739                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2740                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2741                         if (*(tl + 1))
2742                                 nd->nd_flag |= ND_NOMOREDATA;
2743                 }
2744                 error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
2745                 /* and the second wcc attribute reply. */
2746                 if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2747                     !error) {
2748                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2749                         if (*(tl + 1))
2750                                 nd->nd_flag |= ND_NOMOREDATA;
2751                 }
2752                 if (!error)
2753                         error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2754                             NULL, NULL);
2755         }
2756         if (nd->nd_repstat && !error)
2757                 error = nd->nd_repstat;
2758 nfsmout:
2759         m_freem(nd->nd_mrep);
2760         return (error);
2761 }
2762
2763 /*
2764  * nfs hard link create rpc
2765  */
2766 int
2767 nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2768     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2769     struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2770 {
2771         u_int32_t *tl;
2772         struct nfsrv_descript nfsd, *nd = &nfsd;
2773         nfsattrbit_t attrbits;
2774         int error = 0;
2775
2776         *attrflagp = 0;
2777         *dattrflagp = 0;
2778         if (namelen > NFS_MAXNAMLEN)
2779                 return (ENAMETOOLONG);
2780         NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
2781         if (nd->nd_flag & ND_NFSV4) {
2782                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2783                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2784         }
2785         (void)nfsm_fhtom(VFSTONFS(dvp->v_mount), nd, VTONFS(dvp)->n_fhp->nfh_fh,
2786                 VTONFS(dvp)->n_fhp->nfh_len, 0);
2787         if (nd->nd_flag & ND_NFSV4) {
2788                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2789                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2790                 NFSWCCATTR_ATTRBIT(&attrbits);
2791                 (void) nfsrv_putattrbit(nd, &attrbits);
2792                 nd->nd_flag |= ND_V4WCCATTR;
2793                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2794                 *tl = txdr_unsigned(NFSV4OP_LINK);
2795         }
2796         (void) nfsm_strtom(nd, name, namelen);
2797         error = nfscl_request(nd, vp, p, cred, dstuff);
2798         if (error)
2799                 return (error);
2800         if (nd->nd_flag & ND_NFSV3) {
2801                 error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2802                 if (!error)
2803                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2804                             NULL, NULL);
2805         } else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2806                 /*
2807                  * First, parse out the PutFH and Getattr result.
2808                  */
2809                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2810                 if (!(*(tl + 1)))
2811                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2812                 if (*(tl + 1))
2813                         nd->nd_flag |= ND_NOMOREDATA;
2814                 /*
2815                  * Get the pre-op attributes.
2816                  */
2817                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2818         }
2819         if (nd->nd_repstat && !error)
2820                 error = nd->nd_repstat;
2821 nfsmout:
2822         m_freem(nd->nd_mrep);
2823         return (error);
2824 }
2825
2826 /*
2827  * nfs symbolic link create rpc
2828  */
2829 int
2830 nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
2831     struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2832     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2833     int *dattrflagp, void *dstuff)
2834 {
2835         u_int32_t *tl;
2836         struct nfsrv_descript nfsd, *nd = &nfsd;
2837         struct nfsmount *nmp;
2838         int slen, error = 0;
2839
2840         *nfhpp = NULL;
2841         *attrflagp = 0;
2842         *dattrflagp = 0;
2843         nmp = VFSTONFS(dvp->v_mount);
2844         slen = strlen(target);
2845         if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2846                 return (ENAMETOOLONG);
2847         NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
2848         if (nd->nd_flag & ND_NFSV4) {
2849                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2850                 *tl = txdr_unsigned(NFLNK);
2851                 (void) nfsm_strtom(nd, target, slen);
2852         }
2853         (void) nfsm_strtom(nd, name, namelen);
2854         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2855                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
2856         if (!(nd->nd_flag & ND_NFSV4))
2857                 (void) nfsm_strtom(nd, target, slen);
2858         if (nd->nd_flag & ND_NFSV2)
2859                 nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2860         error = nfscl_request(nd, dvp, p, cred, dstuff);
2861         if (error)
2862                 return (error);
2863         if (nd->nd_flag & ND_NFSV4)
2864                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2865         if ((nd->nd_flag & ND_NFSV3) && !error) {
2866                 if (!nd->nd_repstat)
2867                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2868                 if (!error)
2869                         error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2870                             NULL, NULL);
2871         }
2872         if (nd->nd_repstat && !error)
2873                 error = nd->nd_repstat;
2874         m_freem(nd->nd_mrep);
2875         /*
2876          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2877          * Only do this if vfs.nfs.ignore_eexist is set.
2878          * Never do this for NFSv4.1 or later minor versions, since sessions
2879          * should guarantee "exactly once" RPC semantics.
2880          */
2881         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2882             nmp->nm_minorvers == 0))
2883                 error = 0;
2884         return (error);
2885 }
2886
2887 /*
2888  * nfs make dir rpc
2889  */
2890 int
2891 nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2892     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2893     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2894     int *dattrflagp, void *dstuff)
2895 {
2896         u_int32_t *tl;
2897         struct nfsrv_descript nfsd, *nd = &nfsd;
2898         nfsattrbit_t attrbits;
2899         int error = 0;
2900         struct nfsfh *fhp;
2901         struct nfsmount *nmp;
2902
2903         *nfhpp = NULL;
2904         *attrflagp = 0;
2905         *dattrflagp = 0;
2906         nmp = VFSTONFS(dvp->v_mount);
2907         fhp = VTONFS(dvp)->n_fhp;
2908         if (namelen > NFS_MAXNAMLEN)
2909                 return (ENAMETOOLONG);
2910         NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
2911         if (nd->nd_flag & ND_NFSV4) {
2912                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2913                 *tl = txdr_unsigned(NFDIR);
2914         }
2915         (void) nfsm_strtom(nd, name, namelen);
2916         nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2917         if (nd->nd_flag & ND_NFSV4) {
2918                 NFSGETATTR_ATTRBIT(&attrbits);
2919                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2920                 *tl++ = txdr_unsigned(NFSV4OP_GETFH);
2921                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2922                 (void) nfsrv_putattrbit(nd, &attrbits);
2923                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2924                 *tl = txdr_unsigned(NFSV4OP_PUTFH);
2925                 (void)nfsm_fhtom(nmp, nd, fhp->nfh_fh, fhp->nfh_len, 0);
2926                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2927                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
2928                 (void) nfsrv_putattrbit(nd, &attrbits);
2929         }
2930         error = nfscl_request(nd, dvp, p, cred, dstuff);
2931         if (error)
2932                 return (error);
2933         if (nd->nd_flag & ND_NFSV4)
2934                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2935         if (!nd->nd_repstat && !error) {
2936                 if (nd->nd_flag & ND_NFSV4) {
2937                         NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2938                         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2939                 }
2940                 if (!error)
2941                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2942                 if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2943                         /* Get rid of the PutFH and Getattr status values. */
2944                         NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2945                         /* Load the directory attributes. */
2946                         error = nfsm_loadattr(nd, dnap);
2947                         if (error == 0)
2948                                 *dattrflagp = 1;
2949                 }
2950         }
2951         if ((nd->nd_flag & ND_NFSV3) && !error)
2952                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2953         if (nd->nd_repstat && !error)
2954                 error = nd->nd_repstat;
2955 nfsmout:
2956         m_freem(nd->nd_mrep);
2957         /*
2958          * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2959          * Only do this if vfs.nfs.ignore_eexist is set.
2960          * Never do this for NFSv4.1 or later minor versions, since sessions
2961          * should guarantee "exactly once" RPC semantics.
2962          */
2963         if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2964             nmp->nm_minorvers == 0))
2965                 error = 0;
2966         return (error);
2967 }
2968
2969 /*
2970  * nfs remove directory call
2971  */
2972 int
2973 nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2974     NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2975 {
2976         struct nfsrv_descript nfsd, *nd = &nfsd;
2977         int error = 0;
2978
2979         *dattrflagp = 0;
2980         if (namelen > NFS_MAXNAMLEN)
2981                 return (ENAMETOOLONG);
2982         NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
2983         (void) nfsm_strtom(nd, name, namelen);
2984         error = nfscl_request(nd, dvp, p, cred, dstuff);
2985         if (error)
2986                 return (error);
2987         if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2988                 error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2989         if (nd->nd_repstat && !error)
2990                 error = nd->nd_repstat;
2991         m_freem(nd->nd_mrep);
2992         /*
2993          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2994          */
2995         if (error == ENOENT)
2996                 error = 0;
2997         return (error);
2998 }
2999
3000 /*
3001  * Readdir rpc.
3002  * Always returns with either uio_resid unchanged, if you are at the
3003  * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3004  * filled in.
3005  * I felt this would allow caching of directory blocks more easily
3006  * than returning a pertially filled block.
3007  * Directory offset cookies:
3008  * Oh my, what to do with them...
3009  * I can think of three ways to deal with them:
3010  * 1 - have the layer above these RPCs maintain a map between logical
3011  *     directory byte offsets and the NFS directory offset cookies
3012  * 2 - pass the opaque directory offset cookies up into userland
3013  *     and let the libc functions deal with them, via the system call
3014  * 3 - return them to userland in the "struct dirent", so future versions
3015  *     of libc can use them and do whatever is necessary to make things work
3016  *     above these rpc calls, in the meantime
3017  * For now, I do #3 by "hiding" the directory offset cookies after the
3018  * d_name field in struct dirent. This is space inside d_reclen that
3019  * will be ignored by anything that doesn't know about them.
3020  * The directory offset cookies are filled in as the last 8 bytes of
3021  * each directory entry, after d_name. Someday, the userland libc
3022  * functions may be able to use these. In the meantime, it satisfies
3023  * OpenBSD's requirements for cookies being returned.
3024  * If expects the directory offset cookie for the read to be in uio_offset
3025  * and returns the one for the next entry after this directory block in
3026  * there, as well.
3027  */
3028 int
3029 nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3030     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3031     int *eofp, void *stuff)
3032 {
3033         int len, left;
3034         struct dirent *dp = NULL;
3035         u_int32_t *tl;
3036         nfsquad_t cookie, ncookie;
3037         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3038         struct nfsnode *dnp = VTONFS(vp);
3039         struct nfsvattr nfsva;
3040         struct nfsrv_descript nfsd, *nd = &nfsd;
3041         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3042         int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3043         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3044         char *cp;
3045         nfsattrbit_t attrbits, dattrbits;
3046         u_int32_t rderr, *tl2 = NULL;
3047         size_t tresid;
3048
3049         KASSERT(uiop->uio_iovcnt == 1 &&
3050             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3051             ("nfs readdirrpc bad uio"));
3052         ncookie.lval[0] = ncookie.lval[1] = 0;
3053         /*
3054          * There is no point in reading a lot more than uio_resid, however
3055          * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3056          * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3057          * will never make readsize > nm_readdirsize.
3058          */
3059         readsize = nmp->nm_readdirsize;
3060         if (readsize > uiop->uio_resid)
3061                 readsize = uiop->uio_resid + DIRBLKSIZ;
3062
3063         *attrflagp = 0;
3064         if (eofp)
3065                 *eofp = 0;
3066         tresid = uiop->uio_resid;
3067         cookie.lval[0] = cookiep->nfsuquad[0];
3068         cookie.lval[1] = cookiep->nfsuquad[1];
3069         nd->nd_mrep = NULL;
3070
3071         /*
3072          * For NFSv4, first create the "." and ".." entries.
3073          */
3074         if (NFSHASNFSV4(nmp)) {
3075                 reqsize = 6 * NFSX_UNSIGNED;
3076                 NFSGETATTR_ATTRBIT(&dattrbits);
3077                 NFSZERO_ATTRBIT(&attrbits);
3078                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3079                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3080                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3081                     NFSATTRBIT_MOUNTEDONFILEID)) {
3082                         NFSSETBIT_ATTRBIT(&attrbits,
3083                             NFSATTRBIT_MOUNTEDONFILEID);
3084                         gotmnton = 1;
3085                 } else {
3086                         /*
3087                          * Must fake it. Use the fileno, except when the
3088                          * fsid is != to that of the directory. For that
3089                          * case, generate a fake fileno that is not the same.
3090                          */
3091                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3092                         gotmnton = 0;
3093                 }
3094
3095                 /*
3096                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3097                  */
3098                 if (uiop->uio_offset == 0) {
3099                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3100                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3101                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3102                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3103                         (void) nfsrv_putattrbit(nd, &attrbits);
3104                         error = nfscl_request(nd, vp, p, cred, stuff);
3105                         if (error)
3106                             return (error);
3107                         dotfileid = 0;  /* Fake out the compiler. */
3108                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3109                             error = nfsm_loadattr(nd, &nfsva);
3110                             if (error != 0)
3111                                 goto nfsmout;
3112                             dotfileid = nfsva.na_fileid;
3113                         }
3114                         if (nd->nd_repstat == 0) {
3115                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3116                             len = fxdr_unsigned(int, *(tl + 4));
3117                             if (len > 0 && len <= NFSX_V4FHMAX)
3118                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3119                             else
3120                                 error = EPERM;
3121                             if (!error) {
3122                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3123                                 nfsva.na_mntonfileno = UINT64_MAX;
3124                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3125                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3126                                     NULL, NULL, NULL, p, cred);
3127                                 if (error) {
3128                                     dotdotfileid = dotfileid;
3129                                 } else if (gotmnton) {
3130                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3131                                         dotdotfileid = nfsva.na_mntonfileno;
3132                                     else
3133                                         dotdotfileid = nfsva.na_fileid;
3134                                 } else if (nfsva.na_filesid[0] ==
3135                                     dnp->n_vattr.na_filesid[0] &&
3136                                     nfsva.na_filesid[1] ==
3137                                     dnp->n_vattr.na_filesid[1]) {
3138                                     dotdotfileid = nfsva.na_fileid;
3139                                 } else {
3140                                     do {
3141                                         fakefileno--;
3142                                     } while (fakefileno ==
3143                                         nfsva.na_fileid);
3144                                     dotdotfileid = fakefileno;
3145                                 }
3146                             }
3147                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3148                             /*
3149                              * Lookupp returns NFSERR_NOENT when we are
3150                              * at the root, so just use the current dir.
3151                              */
3152                             nd->nd_repstat = 0;
3153                             dotdotfileid = dotfileid;
3154                         } else {
3155                             error = nd->nd_repstat;
3156                         }
3157                         m_freem(nd->nd_mrep);
3158                         if (error)
3159                             return (error);
3160                         nd->nd_mrep = NULL;
3161                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3162                         dp->d_pad0 = dp->d_pad1 = 0;
3163                         dp->d_off = 0;
3164                         dp->d_type = DT_DIR;
3165                         dp->d_fileno = dotfileid;
3166                         dp->d_namlen = 1;
3167                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3168                         dp->d_name[0] = '.';
3169                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3170                         /*
3171                          * Just make these offset cookie 0.
3172                          */
3173                         tl = (u_int32_t *)&dp->d_name[8];
3174                         *tl++ = 0;
3175                         *tl = 0;
3176                         blksiz += dp->d_reclen;
3177                         uiop->uio_resid -= dp->d_reclen;
3178                         uiop->uio_offset += dp->d_reclen;
3179                         uiop->uio_iov->iov_base =
3180                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3181                         uiop->uio_iov->iov_len -= dp->d_reclen;
3182                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3183                         dp->d_pad0 = dp->d_pad1 = 0;
3184                         dp->d_off = 0;
3185                         dp->d_type = DT_DIR;
3186                         dp->d_fileno = dotdotfileid;
3187                         dp->d_namlen = 2;
3188                         *((uint64_t *)dp->d_name) = 0;
3189                         dp->d_name[0] = '.';
3190                         dp->d_name[1] = '.';
3191                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3192                         /*
3193                          * Just make these offset cookie 0.
3194                          */
3195                         tl = (u_int32_t *)&dp->d_name[8];
3196                         *tl++ = 0;
3197                         *tl = 0;
3198                         blksiz += dp->d_reclen;
3199                         uiop->uio_resid -= dp->d_reclen;
3200                         uiop->uio_offset += dp->d_reclen;
3201                         uiop->uio_iov->iov_base =
3202                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3203                         uiop->uio_iov->iov_len -= dp->d_reclen;
3204                 }
3205                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3206         } else {
3207                 reqsize = 5 * NFSX_UNSIGNED;
3208         }
3209
3210         /*
3211          * Loop around doing readdir rpc's of size readsize.
3212          * The stopping criteria is EOF or buffer full.
3213          */
3214         while (more_dirs && bigenough) {
3215                 *attrflagp = 0;
3216                 NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3217                 if (nd->nd_flag & ND_NFSV2) {
3218                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3219                         *tl++ = cookie.lval[1];
3220                         *tl = txdr_unsigned(readsize);
3221                 } else {
3222                         NFSM_BUILD(tl, u_int32_t *, reqsize);
3223                         *tl++ = cookie.lval[0];
3224                         *tl++ = cookie.lval[1];
3225                         if (cookie.qval == 0) {
3226                                 *tl++ = 0;
3227                                 *tl++ = 0;
3228                         } else {
3229                                 NFSLOCKNODE(dnp);
3230                                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
3231                                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
3232                                 NFSUNLOCKNODE(dnp);
3233                         }
3234                         if (nd->nd_flag & ND_NFSV4) {
3235                                 *tl++ = txdr_unsigned(readsize);
3236                                 *tl = txdr_unsigned(readsize);
3237                                 (void) nfsrv_putattrbit(nd, &attrbits);
3238                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3239                                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
3240                                 (void) nfsrv_putattrbit(nd, &dattrbits);
3241                         } else {
3242                                 *tl = txdr_unsigned(readsize);
3243                         }
3244                 }
3245                 error = nfscl_request(nd, vp, p, cred, stuff);
3246                 if (error)
3247                         return (error);
3248                 if (!(nd->nd_flag & ND_NFSV2)) {
3249                         if (nd->nd_flag & ND_NFSV3)
3250                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3251                                     stuff);
3252                         if (!nd->nd_repstat && !error) {
3253                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3254                                 NFSLOCKNODE(dnp);
3255                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3256                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
3257                                 NFSUNLOCKNODE(dnp);
3258                         }
3259                 }
3260                 if (nd->nd_repstat || error) {
3261                         if (!error)
3262                                 error = nd->nd_repstat;
3263                         goto nfsmout;
3264                 }
3265                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3266                 more_dirs = fxdr_unsigned(int, *tl);
3267                 if (!more_dirs)
3268                         tryformoredirs = 0;
3269
3270                 /* loop through the dir entries, doctoring them to 4bsd form */
3271                 while (more_dirs && bigenough) {
3272                         if (nd->nd_flag & ND_NFSV4) {
3273                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3274                                 ncookie.lval[0] = *tl++;
3275                                 ncookie.lval[1] = *tl++;
3276                                 len = fxdr_unsigned(int, *tl);
3277                         } else if (nd->nd_flag & ND_NFSV3) {
3278                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3279                                 nfsva.na_fileid = fxdr_hyper(tl);
3280                                 tl += 2;
3281                                 len = fxdr_unsigned(int, *tl);
3282                         } else {
3283                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3284                                 nfsva.na_fileid = fxdr_unsigned(uint64_t,
3285                                     *tl++);
3286                                 len = fxdr_unsigned(int, *tl);
3287                         }
3288                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3289                                 error = EBADRPC;
3290                                 goto nfsmout;
3291                         }
3292                         tlen = roundup2(len, 8);
3293                         if (tlen == len)
3294                                 tlen += 8;  /* To ensure null termination. */
3295                         left = DIRBLKSIZ - blksiz;
3296                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3297                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3298                                 dp->d_reclen += left;
3299                                 uiop->uio_iov->iov_base =
3300                                     (char *)uiop->uio_iov->iov_base + left;
3301                                 uiop->uio_iov->iov_len -= left;
3302                                 uiop->uio_resid -= left;
3303                                 uiop->uio_offset += left;
3304                                 blksiz = 0;
3305                         }
3306                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3307                             uiop->uio_resid)
3308                                 bigenough = 0;
3309                         if (bigenough) {
3310                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3311                                 dp->d_pad0 = dp->d_pad1 = 0;
3312                                 dp->d_off = 0;
3313                                 dp->d_namlen = len;
3314                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3315                                     NFSX_HYPER;
3316                                 dp->d_type = DT_UNKNOWN;
3317                                 blksiz += dp->d_reclen;
3318                                 if (blksiz == DIRBLKSIZ)
3319                                         blksiz = 0;
3320                                 uiop->uio_resid -= DIRHDSIZ;
3321                                 uiop->uio_offset += DIRHDSIZ;
3322                                 uiop->uio_iov->iov_base =
3323                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3324                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3325                                 error = nfsm_mbufuio(nd, uiop, len);
3326                                 if (error)
3327                                         goto nfsmout;
3328                                 cp = uiop->uio_iov->iov_base;
3329                                 tlen -= len;
3330                                 NFSBZERO(cp, tlen);
3331                                 cp += tlen;     /* points to cookie storage */
3332                                 tl2 = (u_int32_t *)cp;
3333                                 uiop->uio_iov->iov_base =
3334                                     (char *)uiop->uio_iov->iov_base + tlen +
3335                                     NFSX_HYPER;
3336                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3337                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3338                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3339                         } else {
3340                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3341                                 if (error)
3342                                         goto nfsmout;
3343                         }
3344                         if (nd->nd_flag & ND_NFSV4) {
3345                                 rderr = 0;
3346                                 nfsva.na_mntonfileno = UINT64_MAX;
3347                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3348                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3349                                     NULL, NULL, &rderr, p, cred);
3350                                 if (error)
3351                                         goto nfsmout;
3352                                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3353                         } else if (nd->nd_flag & ND_NFSV3) {
3354                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3355                                 ncookie.lval[0] = *tl++;
3356                                 ncookie.lval[1] = *tl++;
3357                         } else {
3358                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3359                                 ncookie.lval[0] = 0;
3360                                 ncookie.lval[1] = *tl++;
3361                         }
3362                         if (bigenough) {
3363                             if (nd->nd_flag & ND_NFSV4) {
3364                                 if (rderr) {
3365                                     dp->d_fileno = 0;
3366                                 } else {
3367                                     if (gotmnton) {
3368                                         if (nfsva.na_mntonfileno != UINT64_MAX)
3369                                             dp->d_fileno = nfsva.na_mntonfileno;
3370                                         else
3371                                             dp->d_fileno = nfsva.na_fileid;
3372                                     } else if (nfsva.na_filesid[0] ==
3373                                         dnp->n_vattr.na_filesid[0] &&
3374                                         nfsva.na_filesid[1] ==
3375                                         dnp->n_vattr.na_filesid[1]) {
3376                                         dp->d_fileno = nfsva.na_fileid;
3377                                     } else {
3378                                         do {
3379                                             fakefileno--;
3380                                         } while (fakefileno ==
3381                                             nfsva.na_fileid);
3382                                         dp->d_fileno = fakefileno;
3383                                     }
3384                                     dp->d_type = vtonfs_dtype(nfsva.na_type);
3385                                 }
3386                             } else {
3387                                 dp->d_fileno = nfsva.na_fileid;
3388                             }
3389                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3390                                 ncookie.lval[0];
3391                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3392                                 ncookie.lval[1];
3393                         }
3394                         more_dirs = fxdr_unsigned(int, *tl);
3395                 }
3396                 /*
3397                  * If at end of rpc data, get the eof boolean
3398                  */
3399                 if (!more_dirs) {
3400                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3401                         eof = fxdr_unsigned(int, *tl);
3402                         if (tryformoredirs)
3403                                 more_dirs = !eof;
3404                         if (nd->nd_flag & ND_NFSV4) {
3405                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3406                                     stuff);
3407                                 if (error)
3408                                         goto nfsmout;
3409                         }
3410                 }
3411                 m_freem(nd->nd_mrep);
3412                 nd->nd_mrep = NULL;
3413         }
3414         /*
3415          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3416          * by increasing d_reclen for the last record.
3417          */
3418         if (blksiz > 0) {
3419                 left = DIRBLKSIZ - blksiz;
3420                 NFSBZERO(uiop->uio_iov->iov_base, left);
3421                 dp->d_reclen += left;
3422                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3423                     left;
3424                 uiop->uio_iov->iov_len -= left;
3425                 uiop->uio_resid -= left;
3426                 uiop->uio_offset += left;
3427         }
3428
3429         /*
3430          * If returning no data, assume end of file.
3431          * If not bigenough, return not end of file, since you aren't
3432          *    returning all the data
3433          * Otherwise, return the eof flag from the server.
3434          */
3435         if (eofp) {
3436                 if (tresid == ((size_t)(uiop->uio_resid)))
3437                         *eofp = 1;
3438                 else if (!bigenough)
3439                         *eofp = 0;
3440                 else
3441                         *eofp = eof;
3442         }
3443
3444         /*
3445          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3446          */
3447         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3448                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3449                 NFSBZERO(dp, DIRBLKSIZ);
3450                 dp->d_type = DT_UNKNOWN;
3451                 tl = (u_int32_t *)&dp->d_name[4];
3452                 *tl++ = cookie.lval[0];
3453                 *tl = cookie.lval[1];
3454                 dp->d_reclen = DIRBLKSIZ;
3455                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3456                     DIRBLKSIZ;
3457                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3458                 uiop->uio_resid -= DIRBLKSIZ;
3459                 uiop->uio_offset += DIRBLKSIZ;
3460         }
3461
3462 nfsmout:
3463         if (nd->nd_mrep != NULL)
3464                 m_freem(nd->nd_mrep);
3465         return (error);
3466 }
3467
3468 #ifndef APPLE
3469 /*
3470  * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3471  * (Also used for NFS V4 when mount flag set.)
3472  * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3473  */
3474 int
3475 nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3476     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3477     int *eofp, void *stuff)
3478 {
3479         int len, left;
3480         struct dirent *dp = NULL;
3481         u_int32_t *tl;
3482         vnode_t newvp = NULLVP;
3483         struct nfsrv_descript nfsd, *nd = &nfsd;
3484         struct nameidata nami, *ndp = &nami;
3485         struct componentname *cnp = &ndp->ni_cnd;
3486         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3487         struct nfsnode *dnp = VTONFS(vp), *np;
3488         struct nfsvattr nfsva;
3489         struct nfsfh *nfhp;
3490         nfsquad_t cookie, ncookie;
3491         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3492         int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3493         int isdotdot = 0, unlocknewvp = 0;
3494         u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3495         u_int64_t fileno = 0;
3496         char *cp;
3497         nfsattrbit_t attrbits, dattrbits;
3498         size_t tresid;
3499         u_int32_t *tl2 = NULL, rderr;
3500         struct timespec dctime, ts;
3501         bool attr_ok;
3502
3503         KASSERT(uiop->uio_iovcnt == 1 &&
3504             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3505             ("nfs readdirplusrpc bad uio"));
3506         ncookie.lval[0] = ncookie.lval[1] = 0;
3507         timespecclear(&dctime);
3508         *attrflagp = 0;
3509         if (eofp != NULL)
3510                 *eofp = 0;
3511         ndp->ni_dvp = vp;
3512         nd->nd_mrep = NULL;
3513         cookie.lval[0] = cookiep->nfsuquad[0];
3514         cookie.lval[1] = cookiep->nfsuquad[1];
3515         tresid = uiop->uio_resid;
3516
3517         /*
3518          * For NFSv4, first create the "." and ".." entries.
3519          */
3520         if (NFSHASNFSV4(nmp)) {
3521                 NFSGETATTR_ATTRBIT(&dattrbits);
3522                 NFSZERO_ATTRBIT(&attrbits);
3523                 NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3524                 if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3525                     NFSATTRBIT_MOUNTEDONFILEID)) {
3526                         NFSSETBIT_ATTRBIT(&attrbits,
3527                             NFSATTRBIT_MOUNTEDONFILEID);
3528                         gotmnton = 1;
3529                 } else {
3530                         /*
3531                          * Must fake it. Use the fileno, except when the
3532                          * fsid is != to that of the directory. For that
3533                          * case, generate a fake fileno that is not the same.
3534                          */
3535                         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3536                         gotmnton = 0;
3537                 }
3538
3539                 /*
3540                  * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3541                  */
3542                 if (uiop->uio_offset == 0) {
3543                         NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3544                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3545                         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
3546                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3547                         (void) nfsrv_putattrbit(nd, &attrbits);
3548                         error = nfscl_request(nd, vp, p, cred, stuff);
3549                         if (error)
3550                             return (error);
3551                         dotfileid = 0;  /* Fake out the compiler. */
3552                         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3553                             error = nfsm_loadattr(nd, &nfsva);
3554                             if (error != 0)
3555                                 goto nfsmout;
3556                             dctime = nfsva.na_ctime;
3557                             dotfileid = nfsva.na_fileid;
3558                         }
3559                         if (nd->nd_repstat == 0) {
3560                             NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3561                             len = fxdr_unsigned(int, *(tl + 4));
3562                             if (len > 0 && len <= NFSX_V4FHMAX)
3563                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3564                             else
3565                                 error = EPERM;
3566                             if (!error) {
3567                                 NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3568                                 nfsva.na_mntonfileno = UINT64_MAX;
3569                                 error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3570                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3571                                     NULL, NULL, NULL, p, cred);
3572                                 if (error) {
3573                                     dotdotfileid = dotfileid;
3574                                 } else if (gotmnton) {
3575                                     if (nfsva.na_mntonfileno != UINT64_MAX)
3576                                         dotdotfileid = nfsva.na_mntonfileno;
3577                                     else
3578                                         dotdotfileid = nfsva.na_fileid;
3579                                 } else if (nfsva.na_filesid[0] ==
3580                                     dnp->n_vattr.na_filesid[0] &&
3581                                     nfsva.na_filesid[1] ==
3582                                     dnp->n_vattr.na_filesid[1]) {
3583                                     dotdotfileid = nfsva.na_fileid;
3584                                 } else {
3585                                     do {
3586                                         fakefileno--;
3587                                     } while (fakefileno ==
3588                                         nfsva.na_fileid);
3589                                     dotdotfileid = fakefileno;
3590                                 }
3591                             }
3592                         } else if (nd->nd_repstat == NFSERR_NOENT) {
3593                             /*
3594                              * Lookupp returns NFSERR_NOENT when we are
3595                              * at the root, so just use the current dir.
3596                              */
3597                             nd->nd_repstat = 0;
3598                             dotdotfileid = dotfileid;
3599                         } else {
3600                             error = nd->nd_repstat;
3601                         }
3602                         m_freem(nd->nd_mrep);
3603                         if (error)
3604                             return (error);
3605                         nd->nd_mrep = NULL;
3606                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3607                         dp->d_pad0 = dp->d_pad1 = 0;
3608                         dp->d_off = 0;
3609                         dp->d_type = DT_DIR;
3610                         dp->d_fileno = dotfileid;
3611                         dp->d_namlen = 1;
3612                         *((uint64_t *)dp->d_name) = 0;  /* Zero pad it. */
3613                         dp->d_name[0] = '.';
3614                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3615                         /*
3616                          * Just make these offset cookie 0.
3617                          */
3618                         tl = (u_int32_t *)&dp->d_name[8];
3619                         *tl++ = 0;
3620                         *tl = 0;
3621                         blksiz += dp->d_reclen;
3622                         uiop->uio_resid -= dp->d_reclen;
3623                         uiop->uio_offset += dp->d_reclen;
3624                         uiop->uio_iov->iov_base =
3625                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3626                         uiop->uio_iov->iov_len -= dp->d_reclen;
3627                         dp = (struct dirent *)uiop->uio_iov->iov_base;
3628                         dp->d_pad0 = dp->d_pad1 = 0;
3629                         dp->d_off = 0;
3630                         dp->d_type = DT_DIR;
3631                         dp->d_fileno = dotdotfileid;
3632                         dp->d_namlen = 2;
3633                         *((uint64_t *)dp->d_name) = 0;
3634                         dp->d_name[0] = '.';
3635                         dp->d_name[1] = '.';
3636                         dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3637                         /*
3638                          * Just make these offset cookie 0.
3639                          */
3640                         tl = (u_int32_t *)&dp->d_name[8];
3641                         *tl++ = 0;
3642                         *tl = 0;
3643                         blksiz += dp->d_reclen;
3644                         uiop->uio_resid -= dp->d_reclen;
3645                         uiop->uio_offset += dp->d_reclen;
3646                         uiop->uio_iov->iov_base =
3647                             (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3648                         uiop->uio_iov->iov_len -= dp->d_reclen;
3649                 }
3650                 NFSREADDIRPLUS_ATTRBIT(&attrbits);
3651                 if (gotmnton)
3652                         NFSSETBIT_ATTRBIT(&attrbits,
3653                             NFSATTRBIT_MOUNTEDONFILEID);
3654                 if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3655                     NFSATTRBIT_TIMECREATE))
3656                         NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3657         }
3658
3659         /*
3660          * Loop around doing readdir rpc's of size nm_readdirsize.
3661          * The stopping criteria is EOF or buffer full.
3662          */
3663         while (more_dirs && bigenough) {
3664                 *attrflagp = 0;
3665                 NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3666                 NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3667                 *tl++ = cookie.lval[0];
3668                 *tl++ = cookie.lval[1];
3669                 if (cookie.qval == 0) {
3670                         *tl++ = 0;
3671                         *tl++ = 0;
3672                 } else {
3673                         NFSLOCKNODE(dnp);
3674                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
3675                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
3676                         NFSUNLOCKNODE(dnp);
3677                 }
3678                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
3679                 *tl = txdr_unsigned(nmp->nm_readdirsize);
3680                 if (nd->nd_flag & ND_NFSV4) {
3681                         (void) nfsrv_putattrbit(nd, &attrbits);
3682                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3683                         *tl = txdr_unsigned(NFSV4OP_GETATTR);
3684                         (void) nfsrv_putattrbit(nd, &dattrbits);
3685                 }
3686                 nanouptime(&ts);
3687                 error = nfscl_request(nd, vp, p, cred, stuff);
3688                 if (error)
3689                         return (error);
3690                 if (nd->nd_flag & ND_NFSV3)
3691                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3692                 if (nd->nd_repstat || error) {
3693                         if (!error)
3694                                 error = nd->nd_repstat;
3695                         goto nfsmout;
3696                 }
3697                 if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3698                         dctime = nap->na_ctime;
3699                 NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3700                 NFSLOCKNODE(dnp);
3701                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
3702                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
3703                 NFSUNLOCKNODE(dnp);
3704                 more_dirs = fxdr_unsigned(int, *tl);
3705                 if (!more_dirs)
3706                         tryformoredirs = 0;
3707
3708                 /* loop through the dir entries, doctoring them to 4bsd form */
3709                 while (more_dirs && bigenough) {
3710                         NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3711                         if (nd->nd_flag & ND_NFSV4) {
3712                                 ncookie.lval[0] = *tl++;
3713                                 ncookie.lval[1] = *tl++;
3714                         } else {
3715                                 fileno = fxdr_hyper(tl);
3716                                 tl += 2;
3717                         }
3718                         len = fxdr_unsigned(int, *tl);
3719                         if (len <= 0 || len > NFS_MAXNAMLEN) {
3720                                 error = EBADRPC;
3721                                 goto nfsmout;
3722                         }
3723                         tlen = roundup2(len, 8);
3724                         if (tlen == len)
3725                                 tlen += 8;  /* To ensure null termination. */
3726                         left = DIRBLKSIZ - blksiz;
3727                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3728                                 NFSBZERO(uiop->uio_iov->iov_base, left);
3729                                 dp->d_reclen += left;
3730                                 uiop->uio_iov->iov_base =
3731                                     (char *)uiop->uio_iov->iov_base + left;
3732                                 uiop->uio_iov->iov_len -= left;
3733                                 uiop->uio_resid -= left;
3734                                 uiop->uio_offset += left;
3735                                 blksiz = 0;
3736                         }
3737                         if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3738                             uiop->uio_resid)
3739                                 bigenough = 0;
3740                         if (bigenough) {
3741                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3742                                 dp->d_pad0 = dp->d_pad1 = 0;
3743                                 dp->d_off = 0;
3744                                 dp->d_namlen = len;
3745                                 dp->d_reclen = _GENERIC_DIRLEN(len) +
3746                                     NFSX_HYPER;
3747                                 dp->d_type = DT_UNKNOWN;
3748                                 blksiz += dp->d_reclen;
3749                                 if (blksiz == DIRBLKSIZ)
3750                                         blksiz = 0;
3751                                 uiop->uio_resid -= DIRHDSIZ;
3752                                 uiop->uio_offset += DIRHDSIZ;
3753                                 uiop->uio_iov->iov_base =
3754                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3755                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
3756                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
3757                                 cnp->cn_namelen = len;
3758                                 NFSCNHASHZERO(cnp);
3759                                 error = nfsm_mbufuio(nd, uiop, len);
3760                                 if (error)
3761                                         goto nfsmout;
3762                                 cp = uiop->uio_iov->iov_base;
3763                                 tlen -= len;
3764                                 NFSBZERO(cp, tlen);
3765                                 cp += tlen;     /* points to cookie storage */
3766                                 tl2 = (u_int32_t *)cp;
3767                                 if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3768                                     cnp->cn_nameptr[1] == '.')
3769                                         isdotdot = 1;
3770                                 else
3771                                         isdotdot = 0;
3772                                 uiop->uio_iov->iov_base =
3773                                     (char *)uiop->uio_iov->iov_base + tlen +
3774                                     NFSX_HYPER;
3775                                 uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3776                                 uiop->uio_resid -= tlen + NFSX_HYPER;
3777                                 uiop->uio_offset += (tlen + NFSX_HYPER);
3778                         } else {
3779                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3780                                 if (error)
3781                                         goto nfsmout;
3782                         }
3783                         nfhp = NULL;
3784                         if (nd->nd_flag & ND_NFSV3) {
3785                                 NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3786                                 ncookie.lval[0] = *tl++;
3787                                 ncookie.lval[1] = *tl++;
3788                                 attrflag = fxdr_unsigned(int, *tl);
3789                                 if (attrflag) {
3790                                   error = nfsm_loadattr(nd, &nfsva);
3791                                   if (error)
3792                                         goto nfsmout;
3793                                 }
3794                                 NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3795                                 if (*tl) {
3796                                         error = nfsm_getfh(nd, &nfhp);
3797                                         if (error)
3798                                             goto nfsmout;
3799                                 }
3800                                 if (!attrflag && nfhp != NULL) {
3801                                         free(nfhp, M_NFSFH);
3802                                         nfhp = NULL;
3803                                 }
3804                         } else {
3805                                 rderr = 0;
3806                                 nfsva.na_mntonfileno = 0xffffffff;
3807                                 error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3808                                     NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3809                                     NULL, NULL, &rderr, p, cred);
3810                                 if (error)
3811                                         goto nfsmout;
3812                         }
3813
3814                         if (bigenough) {
3815                             if (nd->nd_flag & ND_NFSV4) {
3816                                 if (rderr) {
3817                                     dp->d_fileno = 0;
3818                                 } else if (gotmnton) {
3819                                     if (nfsva.na_mntonfileno != 0xffffffff)
3820                                         dp->d_fileno = nfsva.na_mntonfileno;
3821                                     else
3822                                         dp->d_fileno = nfsva.na_fileid;
3823                                 } else if (nfsva.na_filesid[0] ==
3824                                     dnp->n_vattr.na_filesid[0] &&
3825                                     nfsva.na_filesid[1] ==
3826                                     dnp->n_vattr.na_filesid[1]) {
3827                                     dp->d_fileno = nfsva.na_fileid;
3828                                 } else {
3829                                     do {
3830                                         fakefileno--;
3831                                     } while (fakefileno ==
3832                                         nfsva.na_fileid);
3833                                     dp->d_fileno = fakefileno;
3834                                 }
3835                             } else {
3836                                 dp->d_fileno = fileno;
3837                             }
3838                             *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3839                                 ncookie.lval[0];
3840                             *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3841                                 ncookie.lval[1];
3842
3843                             if (nfhp != NULL) {
3844                                 attr_ok = true;
3845                                 if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3846                                     dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3847                                     VREF(vp);
3848                                     newvp = vp;
3849                                     unlocknewvp = 0;
3850                                     free(nfhp, M_NFSFH);
3851                                     np = dnp;
3852                                 } else if (isdotdot != 0) {
3853                                     /*
3854                                      * Skip doing a nfscl_nget() call for "..".
3855                                      * There's a race between acquiring the nfs
3856                                      * node here and lookups that look for the
3857                                      * directory being read (in the parent).
3858                                      * It would try to get a lock on ".." here,
3859                                      * owning the lock on the directory being
3860                                      * read. Lookup will hold the lock on ".."
3861                                      * and try to acquire the lock on the
3862                                      * directory being read.
3863                                      * If the directory is unlocked/relocked,
3864                                      * then there is a LOR with the buflock
3865                                      * vp is relocked.
3866                                      */
3867                                     free(nfhp, M_NFSFH);
3868                                 } else {
3869                                     error = nfscl_nget(vp->v_mount, vp,
3870                                       nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3871                                     if (!error) {
3872                                         newvp = NFSTOV(np);
3873                                         unlocknewvp = 1;
3874                                         /*
3875                                          * If n_localmodtime >= time before RPC,
3876                                          * then a file modification operation,
3877                                          * such as VOP_SETATTR() of size, has
3878                                          * occurred while the Lookup RPC and
3879                                          * acquisition of the vnode happened. As
3880                                          * such, the attributes might be stale,
3881                                          * with possibly an incorrect size.
3882                                          */
3883                                         NFSLOCKNODE(np);
3884                                         if (timespecisset(
3885                                             &np->n_localmodtime) &&
3886                                             timespeccmp(&np->n_localmodtime,
3887                                             &ts, >=)) {
3888                                             NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
3889                                                 " localmod stale attributes\n");
3890                                             attr_ok = false;
3891                                         }
3892                                         NFSUNLOCKNODE(np);
3893                                     }
3894                                 }
3895                                 nfhp = NULL;
3896                                 if (newvp != NULLVP) {
3897                                     if (attr_ok)
3898                                         error = nfscl_loadattrcache(&newvp,
3899                                             &nfsva, NULL, NULL, 0, 0);
3900                                     if (error) {
3901                                         if (unlocknewvp)
3902                                             vput(newvp);
3903                                         else
3904                                             vrele(newvp);
3905                                         goto nfsmout;
3906                                     }
3907                                     dp->d_type =
3908                                         vtonfs_dtype(np->n_vattr.na_type);
3909                                     ndp->ni_vp = newvp;
3910                                     NFSCNHASH(cnp, HASHINIT);
3911                                     if (cnp->cn_namelen <= NCHNAMLEN &&
3912                                         ndp->ni_dvp != ndp->ni_vp &&
3913                                         (newvp->v_type != VDIR ||
3914                                          dctime.tv_sec != 0)) {
3915                                         cache_enter_time_flags(ndp->ni_dvp,
3916                                             ndp->ni_vp, cnp,
3917                                             &nfsva.na_ctime,
3918                                             newvp->v_type != VDIR ? NULL :
3919                                             &dctime, VFS_CACHE_DROPOLD);
3920                                     }
3921                                     if (unlocknewvp)
3922                                         vput(newvp);
3923                                     else
3924                                         vrele(newvp);
3925                                     newvp = NULLVP;
3926                                 }
3927                             }
3928                         } else if (nfhp != NULL) {
3929                             free(nfhp, M_NFSFH);
3930                         }
3931                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3932                         more_dirs = fxdr_unsigned(int, *tl);
3933                 }
3934                 /*
3935                  * If at end of rpc data, get the eof boolean
3936                  */
3937                 if (!more_dirs) {
3938                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3939                         eof = fxdr_unsigned(int, *tl);
3940                         if (tryformoredirs)
3941                                 more_dirs = !eof;
3942                         if (nd->nd_flag & ND_NFSV4) {
3943                                 error = nfscl_postop_attr(nd, nap, attrflagp,
3944                                     stuff);
3945                                 if (error)
3946                                         goto nfsmout;
3947                         }
3948                 }
3949                 m_freem(nd->nd_mrep);
3950                 nd->nd_mrep = NULL;
3951         }
3952         /*
3953          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3954          * by increasing d_reclen for the last record.
3955          */
3956         if (blksiz > 0) {
3957                 left = DIRBLKSIZ - blksiz;
3958                 NFSBZERO(uiop->uio_iov->iov_base, left);
3959                 dp->d_reclen += left;
3960                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3961                     left;
3962                 uiop->uio_iov->iov_len -= left;
3963                 uiop->uio_resid -= left;
3964                 uiop->uio_offset += left;
3965         }
3966
3967         /*
3968          * If returning no data, assume end of file.
3969          * If not bigenough, return not end of file, since you aren't
3970          *    returning all the data
3971          * Otherwise, return the eof flag from the server.
3972          */
3973         if (eofp != NULL) {
3974                 if (tresid == uiop->uio_resid)
3975                         *eofp = 1;
3976                 else if (!bigenough)
3977                         *eofp = 0;
3978                 else
3979                         *eofp = eof;
3980         }
3981
3982         /*
3983          * Add extra empty records to any remaining DIRBLKSIZ chunks.
3984          */
3985         while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3986                 dp = (struct dirent *)uiop->uio_iov->iov_base;
3987                 NFSBZERO(dp, DIRBLKSIZ);
3988                 dp->d_type = DT_UNKNOWN;
3989                 tl = (u_int32_t *)&dp->d_name[4];
3990                 *tl++ = cookie.lval[0];
3991                 *tl = cookie.lval[1];
3992                 dp->d_reclen = DIRBLKSIZ;
3993                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3994                     DIRBLKSIZ;
3995                 uiop->uio_iov->iov_len -= DIRBLKSIZ;
3996                 uiop->uio_resid -= DIRBLKSIZ;
3997                 uiop->uio_offset += DIRBLKSIZ;
3998         }
3999
4000 nfsmout:
4001         if (nd->nd_mrep != NULL)
4002                 m_freem(nd->nd_mrep);
4003         return (error);
4004 }
4005 #endif  /* !APPLE */
4006
4007 /*
4008  * Nfs commit rpc
4009  */
4010 int
4011 nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4012     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4013 {
4014         u_int32_t *tl;
4015         struct nfsrv_descript nfsd, *nd = &nfsd;
4016         nfsattrbit_t attrbits;
4017         int error;
4018         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4019
4020         *attrflagp = 0;
4021         NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4022         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4023         txdr_hyper(offset, tl);
4024         tl += 2;
4025         *tl = txdr_unsigned(cnt);
4026         if (nd->nd_flag & ND_NFSV4) {
4027                 /*
4028                  * And do a Getattr op.
4029                  */
4030                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4031                 *tl = txdr_unsigned(NFSV4OP_GETATTR);
4032                 NFSGETATTR_ATTRBIT(&attrbits);
4033                 (void) nfsrv_putattrbit(nd, &attrbits);
4034         }
4035         error = nfscl_request(nd, vp, p, cred, stuff);
4036         if (error)
4037                 return (error);
4038         error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4039         if (!error && !nd->nd_repstat) {
4040                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4041                 NFSLOCKMNT(nmp);
4042                 if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4043                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4044                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
4045                 }
4046                 NFSUNLOCKMNT(nmp);
4047                 if (nd->nd_flag & ND_NFSV4)
4048                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4049         }
4050 nfsmout:
4051         if (!error && nd->nd_repstat)
4052                 error = nd->nd_repstat;
4053         m_freem(nd->nd_mrep);
4054         return (error);
4055 }
4056
4057 /*
4058  * NFS byte range lock rpc.
4059  * (Mostly just calls one of the three lower level RPC routines.)
4060  */
4061 int
4062 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4063     int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4064 {
4065         struct nfscllockowner *lp;
4066         struct nfsclclient *clp;
4067         struct nfsfh *nfhp;
4068         struct nfsrv_descript nfsd, *nd = &nfsd;
4069         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4070         u_int64_t off, len;
4071         off_t start, end;
4072         u_int32_t clidrev = 0;
4073         int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4074         int callcnt, dorpc;
4075
4076         /*
4077          * Convert the flock structure into a start and end and do POSIX
4078          * bounds checking.
4079          */
4080         switch (fl->l_whence) {
4081         case SEEK_SET:
4082         case SEEK_CUR:
4083                 /*
4084                  * Caller is responsible for adding any necessary offset
4085                  * when SEEK_CUR is used.
4086                  */
4087                 start = fl->l_start;
4088                 off = fl->l_start;
4089                 break;
4090         case SEEK_END:
4091                 start = size + fl->l_start;
4092                 off = size + fl->l_start;
4093                 break;
4094         default:
4095                 return (EINVAL);
4096         }
4097         if (start < 0)
4098                 return (EINVAL);
4099         if (fl->l_len != 0) {
4100                 end = start + fl->l_len - 1;
4101                 if (end < start)
4102                         return (EINVAL);
4103         }
4104
4105         len = fl->l_len;
4106         if (len == 0)
4107                 len = NFS64BITSSET;
4108         retrycnt = 0;
4109         do {
4110             nd->nd_repstat = 0;
4111             if (op == F_GETLK) {
4112                 error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4113                 if (error)
4114                         return (error);
4115                 error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4116                 if (!error) {
4117                         clidrev = clp->nfsc_clientidrev;
4118                         error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4119                             p, id, flags);
4120                 } else if (error == -1) {
4121                         error = 0;
4122                 }
4123                 nfscl_clientrelease(clp);
4124             } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4125                 /*
4126                  * We must loop around for all lockowner cases.
4127                  */
4128                 callcnt = 0;
4129                 error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4130                 if (error)
4131                         return (error);
4132                 do {
4133                     error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4134                         clp, id, flags, &lp, &dorpc);
4135                     /*
4136                      * If it returns a NULL lp, we're done.
4137                      */
4138                     if (lp == NULL) {
4139                         if (callcnt == 0)
4140                             nfscl_clientrelease(clp);
4141                         else
4142                             nfscl_releasealllocks(clp, vp, p, id, flags);
4143                         return (error);
4144                     }
4145                     if (nmp->nm_clp != NULL)
4146                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4147                     else
4148                         clidrev = 0;
4149                     /*
4150                      * If the server doesn't support Posix lock semantics,
4151                      * only allow locks on the entire file, since it won't
4152                      * handle overlapping byte ranges.
4153                      * There might still be a problem when a lock
4154                      * upgrade/downgrade (read<->write) occurs, since the
4155                      * server "might" expect an unlock first?
4156                      */
4157                     if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4158                         (off == 0 && len == NFS64BITSSET))) {
4159                         /*
4160                          * Since the lock records will go away, we must
4161                          * wait for grace and delay here.
4162                          */
4163                         do {
4164                             error = nfsrpc_locku(nd, nmp, lp, off, len,
4165                                 NFSV4LOCKT_READ, cred, p, 0);
4166                             if ((nd->nd_repstat == NFSERR_GRACE ||
4167                                  nd->nd_repstat == NFSERR_DELAY) &&
4168                                 error == 0)
4169                                 (void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4170                                     "nfs_advlock");
4171                         } while ((nd->nd_repstat == NFSERR_GRACE ||
4172                             nd->nd_repstat == NFSERR_DELAY) && error == 0);
4173                     }
4174                     callcnt++;
4175                 } while (error == 0 && nd->nd_repstat == 0);
4176                 nfscl_releasealllocks(clp, vp, p, id, flags);
4177             } else if (op == F_SETLK) {
4178                 error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4179                     NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4180                 if (error || donelocally) {
4181                         return (error);
4182                 }
4183                 if (nmp->nm_clp != NULL)
4184                         clidrev = nmp->nm_clp->nfsc_clientidrev;
4185                 else
4186                         clidrev = 0;
4187                 nfhp = VTONFS(vp)->n_fhp;
4188                 if (!lp->nfsl_open->nfso_posixlock &&
4189                     (off != 0 || len != NFS64BITSSET)) {
4190                         error = EINVAL;
4191                 } else {
4192                         error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4193                             nfhp->nfh_len, lp, newone, reclaim, off,
4194                             len, fl->l_type, cred, p, 0);
4195                 }
4196                 if (!error)
4197                         error = nd->nd_repstat;
4198                 nfscl_lockrelease(lp, error, newone);
4199             } else {
4200                 error = EINVAL;
4201             }
4202             if (!error)
4203                 error = nd->nd_repstat;
4204             if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4205                 error == NFSERR_STALEDONTRECOVER ||
4206                 error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4207                 error == NFSERR_BADSESSION) {
4208                 (void) nfs_catnap(PZERO, error, "nfs_advlock");
4209             } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4210                 && clidrev != 0) {
4211                 expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4212                 retrycnt++;
4213             }
4214         } while (error == NFSERR_GRACE ||
4215             error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4216             error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4217             error == NFSERR_BADSESSION ||
4218             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4219              expireret == 0 && clidrev != 0 && retrycnt < 4));
4220         if (error && retrycnt >= 4)
4221                 error = EIO;
4222         return (error);
4223 }
4224
4225 /*
4226  * The lower level routine for the LockT case.
4227  */
4228 int
4229 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4230     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4231     struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4232 {
4233         u_int32_t *tl;
4234         int error, type, size;
4235         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4236         struct nfsnode *np;
4237         struct nfsmount *nmp;
4238         struct nfsclsession *tsep;
4239
4240         nmp = VFSTONFS(vp->v_mount);
4241         NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4242         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4243         if (fl->l_type == F_RDLCK)
4244                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4245         else
4246                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4247         txdr_hyper(off, tl);
4248         tl += 2;
4249         txdr_hyper(len, tl);
4250         tl += 2;
4251         tsep = nfsmnt_mdssession(nmp);
4252         *tl++ = tsep->nfsess_clientid.lval[0];
4253         *tl = tsep->nfsess_clientid.lval[1];
4254         nfscl_filllockowner(id, own, flags);
4255         np = VTONFS(vp);
4256         NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4257             np->n_fhp->nfh_len);
4258         (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4259         error = nfscl_request(nd, vp, p, cred, NULL);
4260         if (error)
4261                 return (error);
4262         if (nd->nd_repstat == 0) {
4263                 fl->l_type = F_UNLCK;
4264         } else if (nd->nd_repstat == NFSERR_DENIED) {
4265                 nd->nd_repstat = 0;
4266                 fl->l_whence = SEEK_SET;
4267                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4268                 fl->l_start = fxdr_hyper(tl);
4269                 tl += 2;
4270                 len = fxdr_hyper(tl);
4271                 tl += 2;
4272                 if (len == NFS64BITSSET)
4273                         fl->l_len = 0;
4274                 else
4275                         fl->l_len = len;
4276                 type = fxdr_unsigned(int, *tl++);
4277                 if (type == NFSV4LOCKT_WRITE)
4278                         fl->l_type = F_WRLCK;
4279                 else
4280                         fl->l_type = F_RDLCK;
4281                 /*
4282                  * XXX For now, I have no idea what to do with the
4283                  * conflicting lock_owner, so I'll just set the pid == 0
4284                  * and skip over the lock_owner.
4285                  */
4286                 fl->l_pid = (pid_t)0;
4287                 tl += 2;
4288                 size = fxdr_unsigned(int, *tl);
4289                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4290                         error = EBADRPC;
4291                 if (!error)
4292                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4293         } else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4294                 nfscl_initiate_recovery(clp);
4295 nfsmout:
4296         m_freem(nd->nd_mrep);
4297         return (error);
4298 }
4299
4300 /*
4301  * Lower level function that performs the LockU RPC.
4302  */
4303 static int
4304 nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4305     struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4306     u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4307 {
4308         u_int32_t *tl;
4309         int error;
4310
4311         nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4312             lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4313         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4314         *tl++ = txdr_unsigned(type);
4315         *tl = txdr_unsigned(lp->nfsl_seqid);
4316         if (nfstest_outofseq &&
4317             (arc4random() % nfstest_outofseq) == 0)
4318                 *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4319         tl++;
4320         if (NFSHASNFSV4N(nmp))
4321                 *tl++ = 0;
4322         else
4323                 *tl++ = lp->nfsl_stateid.seqid;
4324         *tl++ = lp->nfsl_stateid.other[0];
4325         *tl++ = lp->nfsl_stateid.other[1];
4326         *tl++ = lp->nfsl_stateid.other[2];
4327         txdr_hyper(off, tl);
4328         tl += 2;
4329         txdr_hyper(len, tl);
4330         if (syscred)
4331                 nd->nd_flag |= ND_USEGSSNAME;
4332         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4333             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4334         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4335         if (error)
4336                 return (error);
4337         if (nd->nd_repstat == 0) {
4338                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4339                 lp->nfsl_stateid.seqid = *tl++;
4340                 lp->nfsl_stateid.other[0] = *tl++;
4341                 lp->nfsl_stateid.other[1] = *tl++;
4342                 lp->nfsl_stateid.other[2] = *tl;
4343         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4344                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4345 nfsmout:
4346         m_freem(nd->nd_mrep);
4347         return (error);
4348 }
4349
4350 /*
4351  * The actual Lock RPC.
4352  */
4353 int
4354 nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4355     u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4356     int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4357     NFSPROC_T *p, int syscred)
4358 {
4359         u_int32_t *tl;
4360         int error, size;
4361         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4362         struct nfsclsession *tsep;
4363
4364         nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4365             cred);
4366         NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4367         if (type == F_RDLCK)
4368                 *tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4369         else
4370                 *tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4371         *tl++ = txdr_unsigned(reclaim);
4372         txdr_hyper(off, tl);
4373         tl += 2;
4374         txdr_hyper(len, tl);
4375         tl += 2;
4376         if (newone) {
4377             *tl = newnfs_true;
4378             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4379                 2 * NFSX_UNSIGNED + NFSX_HYPER);
4380             *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4381             if (NFSHASNFSV4N(nmp))
4382                 *tl++ = 0;
4383             else
4384                 *tl++ = lp->nfsl_open->nfso_stateid.seqid;
4385             *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4386             *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4387             *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4388             *tl++ = txdr_unsigned(lp->nfsl_seqid);
4389             tsep = nfsmnt_mdssession(nmp);
4390             *tl++ = tsep->nfsess_clientid.lval[0];
4391             *tl = tsep->nfsess_clientid.lval[1];
4392             NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4393             NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4394             (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4395         } else {
4396             *tl = newnfs_false;
4397             NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4398             if (NFSHASNFSV4N(nmp))
4399                 *tl++ = 0;
4400             else
4401                 *tl++ = lp->nfsl_stateid.seqid;
4402             *tl++ = lp->nfsl_stateid.other[0];
4403             *tl++ = lp->nfsl_stateid.other[1];
4404             *tl++ = lp->nfsl_stateid.other[2];
4405             *tl = txdr_unsigned(lp->nfsl_seqid);
4406             if (nfstest_outofseq &&
4407                 (arc4random() % nfstest_outofseq) == 0)
4408                     *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4409         }
4410         if (syscred)
4411                 nd->nd_flag |= ND_USEGSSNAME;
4412         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4413             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4414         if (error)
4415                 return (error);
4416         if (newone)
4417             NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4418         NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4419         if (nd->nd_repstat == 0) {
4420                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4421                 lp->nfsl_stateid.seqid = *tl++;
4422                 lp->nfsl_stateid.other[0] = *tl++;
4423                 lp->nfsl_stateid.other[1] = *tl++;
4424                 lp->nfsl_stateid.other[2] = *tl;
4425         } else if (nd->nd_repstat == NFSERR_DENIED) {
4426                 NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4427                 size = fxdr_unsigned(int, *(tl + 7));
4428                 if (size < 0 || size > NFSV4_OPAQUELIMIT)
4429                         error = EBADRPC;
4430                 if (!error)
4431                         error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4432         } else if (nd->nd_repstat == NFSERR_STALESTATEID)
4433                 nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4434 nfsmout:
4435         m_freem(nd->nd_mrep);
4436         return (error);
4437 }
4438
4439 /*
4440  * nfs statfs rpc
4441  * (always called with the vp for the mount point)
4442  */
4443 int
4444 nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4445     uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
4446     int *attrflagp, void *stuff)
4447 {
4448         u_int32_t *tl = NULL;
4449         struct nfsrv_descript nfsd, *nd = &nfsd;
4450         struct nfsmount *nmp;
4451         nfsattrbit_t attrbits;
4452         int error;
4453
4454         *attrflagp = 0;
4455         nmp = VFSTONFS(vp->v_mount);
4456         if (NFSHASNFSV4(nmp)) {
4457                 /*
4458                  * For V4, you actually do a getattr.
4459                  */
4460                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4461                 if (leasep != NULL)
4462                         NFSROOTFS_GETATTRBIT(&attrbits);
4463                 else
4464                         NFSSTATFS_GETATTRBIT(&attrbits);
4465                 (void) nfsrv_putattrbit(nd, &attrbits);
4466                 nd->nd_flag |= ND_USEGSSNAME;
4467                 error = nfscl_request(nd, vp, p, cred, stuff);
4468                 if (error)
4469                         return (error);
4470                 if (nd->nd_repstat == 0) {
4471                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4472                             NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
4473                             p, cred);
4474                         if (!error) {
4475                                 nmp->nm_fsid[0] = nap->na_filesid[0];
4476                                 nmp->nm_fsid[1] = nap->na_filesid[1];
4477                                 NFSSETHASSETFSID(nmp);
4478                                 *attrflagp = 1;
4479                         }
4480                 } else {
4481                         error = nd->nd_repstat;
4482                 }
4483                 if (error)
4484                         goto nfsmout;
4485         } else {
4486                 NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4487                 error = nfscl_request(nd, vp, p, cred, stuff);
4488                 if (error)
4489                         return (error);
4490                 if (nd->nd_flag & ND_NFSV3) {
4491                         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4492                         if (error)
4493                                 goto nfsmout;
4494                 }
4495                 if (nd->nd_repstat) {
4496                         error = nd->nd_repstat;
4497                         goto nfsmout;
4498                 }
4499                 NFSM_DISSECT(tl, u_int32_t *,
4500                     NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4501         }
4502         if (NFSHASNFSV3(nmp)) {
4503                 sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4504                 sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4505                 sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4506                 sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4507                 sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4508                 sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4509                 sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4510         } else if (NFSHASNFSV4(nmp) == 0) {
4511                 sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4512                 sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4513                 sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4514                 sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4515                 sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4516         }
4517 nfsmout:
4518         m_freem(nd->nd_mrep);
4519         return (error);
4520 }
4521
4522 /*
4523  * nfs pathconf rpc
4524  */
4525 int
4526 nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4527     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4528     void *stuff)
4529 {
4530         struct nfsrv_descript nfsd, *nd = &nfsd;
4531         struct nfsmount *nmp;
4532         u_int32_t *tl;
4533         nfsattrbit_t attrbits;
4534         int error;
4535         struct nfsnode *np;
4536
4537         *attrflagp = 0;
4538         nmp = VFSTONFS(vp->v_mount);
4539         if (NFSHASNFSV4(nmp)) {
4540                 np = VTONFS(vp);
4541                 if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
4542                     nmp->nm_fhsize == 0) {
4543                         /* Attempt to get the actual root file handle. */
4544                         error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
4545                             cred, p);
4546                         if (error != 0)
4547                                 return (EACCES);
4548                         if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
4549                                 nfscl_statfs(vp, cred, p);
4550                 }
4551                 /*
4552                  * For V4, you actually do a getattr.
4553                  */
4554                 NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4555                 NFSPATHCONF_GETATTRBIT(&attrbits);
4556                 (void) nfsrv_putattrbit(nd, &attrbits);
4557                 nd->nd_flag |= ND_USEGSSNAME;
4558                 error = nfscl_request(nd, vp, p, cred, stuff);
4559                 if (error)
4560                         return (error);
4561                 if (nd->nd_repstat == 0) {
4562                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4563                             pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4564                             cred);
4565                         if (!error)
4566                                 *attrflagp = 1;
4567                 } else {
4568                         error = nd->nd_repstat;
4569                 }
4570         } else {
4571                 NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4572                 error = nfscl_request(nd, vp, p, cred, stuff);
4573                 if (error)
4574                         return (error);
4575                 error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4576                 if (nd->nd_repstat && !error)
4577                         error = nd->nd_repstat;
4578                 if (!error) {
4579                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4580                         pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4581                         pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4582                         pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4583                         pc->pc_chownrestricted =
4584                             fxdr_unsigned(u_int32_t, *tl++);
4585                         pc->pc_caseinsensitive =
4586                             fxdr_unsigned(u_int32_t, *tl++);
4587                         pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4588                 }
4589         }
4590 nfsmout:
4591         m_freem(nd->nd_mrep);
4592         return (error);
4593 }
4594
4595 /*
4596  * nfs version 3 fsinfo rpc call
4597  */
4598 int
4599 nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4600     NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4601 {
4602         u_int32_t *tl;
4603         struct nfsrv_descript nfsd, *nd = &nfsd;
4604         int error;
4605
4606         *attrflagp = 0;
4607         NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, NULL);
4608         error = nfscl_request(nd, vp, p, cred, stuff);
4609         if (error)
4610                 return (error);
4611         error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4612         if (nd->nd_repstat && !error)
4613                 error = nd->nd_repstat;
4614         if (!error) {
4615                 NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4616                 fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4617                 fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4618                 fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4619                 fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4620                 fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4621                 fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4622                 fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4623                 fsp->fs_maxfilesize = fxdr_hyper(tl);
4624                 tl += 2;
4625                 fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4626                 tl += 2;
4627                 fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4628         }
4629 nfsmout:
4630         m_freem(nd->nd_mrep);
4631         return (error);
4632 }
4633
4634 /*
4635  * This function performs the Renew RPC.
4636  */
4637 int
4638 nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4639     NFSPROC_T *p)
4640 {
4641         u_int32_t *tl;
4642         struct nfsrv_descript nfsd;
4643         struct nfsrv_descript *nd = &nfsd;
4644         struct nfsmount *nmp;
4645         int error;
4646         struct nfssockreq *nrp;
4647         struct nfsclsession *tsep;
4648
4649         nmp = clp->nfsc_nmp;
4650         if (nmp == NULL)
4651                 return (0);
4652         if (dsp == NULL)
4653                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4654                     0, cred);
4655         else
4656                 nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4657                     &dsp->nfsclds_sess, 0, 0, NULL);
4658         if (!NFSHASNFSV4N(nmp)) {
4659                 /* NFSv4.1 just uses a Sequence Op and not a Renew. */
4660                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4661                 tsep = nfsmnt_mdssession(nmp);
4662                 *tl++ = tsep->nfsess_clientid.lval[0];
4663                 *tl = tsep->nfsess_clientid.lval[1];
4664         }
4665         nrp = NULL;
4666         if (dsp != NULL)
4667                 nrp = dsp->nfsclds_sockp;
4668         if (nrp == NULL)
4669                 /* If NULL, use the MDS socket. */
4670                 nrp = &nmp->nm_sockreq;
4671         nd->nd_flag |= ND_USEGSSNAME;
4672         if (dsp == NULL)
4673                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4674                     NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4675         else {
4676                 error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4677                     NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4678                 if (error == ENXIO)
4679                         nfscl_cancelreqs(dsp);
4680         }
4681         if (error)
4682                 return (error);
4683         error = nd->nd_repstat;
4684         m_freem(nd->nd_mrep);
4685         return (error);
4686 }
4687
4688 /*
4689  * This function performs the Releaselockowner RPC.
4690  */
4691 int
4692 nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4693     uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4694 {
4695         struct nfsrv_descript nfsd, *nd = &nfsd;
4696         u_int32_t *tl;
4697         int error;
4698         uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4699         struct nfsclsession *tsep;
4700
4701         if (NFSHASNFSV4N(nmp)) {
4702                 /* For NFSv4.1, do a FreeStateID. */
4703                 nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4704                     NULL, 0, 0, cred);
4705                 nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4706         } else {
4707                 nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4708                     NULL, 0, 0, NULL);
4709                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4710                 tsep = nfsmnt_mdssession(nmp);
4711                 *tl++ = tsep->nfsess_clientid.lval[0];
4712                 *tl = tsep->nfsess_clientid.lval[1];
4713                 NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4714                 NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4715                 (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4716         }
4717         nd->nd_flag |= ND_USEGSSNAME;
4718         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4719             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4720         if (error)
4721                 return (error);
4722         error = nd->nd_repstat;
4723         m_freem(nd->nd_mrep);
4724         return (error);
4725 }
4726
4727 /*
4728  * This function performs the Compound to get the mount pt FH.
4729  */
4730 int
4731 nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4732     NFSPROC_T *p)
4733 {
4734         u_int32_t *tl;
4735         struct nfsrv_descript nfsd;
4736         struct nfsrv_descript *nd = &nfsd;
4737         u_char *cp, *cp2, *fhp;
4738         int error, cnt, len, setnil;
4739         u_int32_t *opcntp;
4740
4741         nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
4742             0, NULL);
4743         cp = dirpath;
4744         cnt = 0;
4745         do {
4746                 setnil = 0;
4747                 while (*cp == '/')
4748                         cp++;
4749                 cp2 = cp;
4750                 while (*cp2 != '\0' && *cp2 != '/')
4751                         cp2++;
4752                 if (*cp2 == '/') {
4753                         setnil = 1;
4754                         *cp2 = '\0';
4755                 }
4756                 if (cp2 != cp) {
4757                         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4758                         *tl = txdr_unsigned(NFSV4OP_LOOKUP);
4759                         nfsm_strtom(nd, cp, strlen(cp));
4760                         cnt++;
4761                 }
4762                 if (setnil)
4763                         *cp2++ = '/';
4764                 cp = cp2;
4765         } while (*cp != '\0');
4766         if (NFSHASNFSV4N(nmp))
4767                 /* Has a Sequence Op done by nfscl_reqstart(). */
4768                 *opcntp = txdr_unsigned(3 + cnt);
4769         else
4770                 *opcntp = txdr_unsigned(2 + cnt);
4771         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4772         *tl = txdr_unsigned(NFSV4OP_GETFH);
4773         nd->nd_flag |= ND_USEGSSNAME;
4774         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4775                 NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4776         if (error)
4777                 return (error);
4778         if (nd->nd_repstat == 0) {
4779                 NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4780                 tl += (2 + 2 * cnt);
4781                 if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4782                         len > NFSX_FHMAX) {
4783                         nd->nd_repstat = NFSERR_BADXDR;
4784                 } else {
4785                         fhp = malloc(len + 1, M_TEMP, M_WAITOK);
4786                         nd->nd_repstat = nfsrv_mtostr(nd, fhp, len);
4787                         if (nd->nd_repstat == 0) {
4788                                 NFSLOCKMNT(nmp);
4789                                 if (nmp->nm_fhsize == 0) {
4790                                         NFSBCOPY(fhp, nmp->nm_fh, len);
4791                                         nmp->nm_fhsize = len;
4792                                 }
4793                                 NFSUNLOCKMNT(nmp);
4794                         }
4795                         free(fhp, M_TEMP);
4796                 }
4797         }
4798         error = nd->nd_repstat;
4799 nfsmout:
4800         m_freem(nd->nd_mrep);
4801         return (error);
4802 }
4803
4804 /*
4805  * This function performs the Delegreturn RPC.
4806  */
4807 int
4808 nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4809     struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4810 {
4811         u_int32_t *tl;
4812         struct nfsrv_descript nfsd;
4813         struct nfsrv_descript *nd = &nfsd;
4814         int error;
4815
4816         nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4817             dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
4818         NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4819         if (NFSHASNFSV4N(nmp))
4820                 *tl++ = 0;
4821         else
4822                 *tl++ = dp->nfsdl_stateid.seqid;
4823         *tl++ = dp->nfsdl_stateid.other[0];
4824         *tl++ = dp->nfsdl_stateid.other[1];
4825         *tl = dp->nfsdl_stateid.other[2];
4826         if (syscred)
4827                 nd->nd_flag |= ND_USEGSSNAME;
4828         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4829             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4830         if (error)
4831                 return (error);
4832         error = nd->nd_repstat;
4833         m_freem(nd->nd_mrep);
4834         return (error);
4835 }
4836
4837 /*
4838  * nfs getacl call.
4839  */
4840 int
4841 nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4842     struct acl *aclp, void *stuff)
4843 {
4844         struct nfsrv_descript nfsd, *nd = &nfsd;
4845         int error;
4846         nfsattrbit_t attrbits;
4847         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4848
4849         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4850                 return (EOPNOTSUPP);
4851         NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
4852         NFSZERO_ATTRBIT(&attrbits);
4853         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4854         (void) nfsrv_putattrbit(nd, &attrbits);
4855         error = nfscl_request(nd, vp, p, cred, stuff);
4856         if (error)
4857                 return (error);
4858         if (!nd->nd_repstat)
4859                 error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4860                     NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4861         else
4862                 error = nd->nd_repstat;
4863         m_freem(nd->nd_mrep);
4864         return (error);
4865 }
4866
4867 /*
4868  * nfs setacl call.
4869  */
4870 int
4871 nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4872     struct acl *aclp, void *stuff)
4873 {
4874         int error;
4875         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4876
4877         if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4878                 return (EOPNOTSUPP);
4879         error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4880         return (error);
4881 }
4882
4883 /*
4884  * nfs setacl call.
4885  */
4886 static int
4887 nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4888     struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4889 {
4890         struct nfsrv_descript nfsd, *nd = &nfsd;
4891         int error;
4892         nfsattrbit_t attrbits;
4893         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4894
4895         if (!NFSHASNFSV4(nmp))
4896                 return (EOPNOTSUPP);
4897         NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
4898         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4899         NFSZERO_ATTRBIT(&attrbits);
4900         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4901         (void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
4902             &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
4903         error = nfscl_request(nd, vp, p, cred, stuff);
4904         if (error)
4905                 return (error);
4906         /* Don't care about the pre/postop attributes */
4907         m_freem(nd->nd_mrep);
4908         return (nd->nd_repstat);
4909 }
4910
4911 /*
4912  * Do the NFSv4.1 Exchange ID.
4913  */
4914 int
4915 nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4916     struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
4917     struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
4918 {
4919         uint32_t *tl, v41flags;
4920         struct nfsrv_descript nfsd;
4921         struct nfsrv_descript *nd = &nfsd;
4922         struct nfsclds *dsp;
4923         struct timespec verstime;
4924         int error, len;
4925
4926         *dspp = NULL;
4927         if (minorvers == 0)
4928                 minorvers = nmp->nm_minorvers;
4929         nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
4930             NFS_VER4, minorvers, NULL);
4931         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4932         *tl++ = txdr_unsigned(nfsboottime.tv_sec);      /* Client owner */
4933         *tl = txdr_unsigned(clp->nfsc_rev);
4934         (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4935
4936         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4937         *tl++ = txdr_unsigned(exchflags);
4938         *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4939
4940         /* Set the implementation id4 */
4941         *tl = txdr_unsigned(1);
4942         (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4943         (void) nfsm_strtom(nd, version, strlen(version));
4944         NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4945         verstime.tv_sec = 1293840000;           /* Jan 1, 2011 */
4946         verstime.tv_nsec = 0;
4947         txdr_nfsv4time(&verstime, tl);
4948         nd->nd_flag |= ND_USEGSSNAME;
4949         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4950             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4951         NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4952             (int)nd->nd_repstat);
4953         if (error != 0)
4954                 return (error);
4955         if (nd->nd_repstat == 0) {
4956                 NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4957                 len = fxdr_unsigned(int, *(tl + 7));
4958                 if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4959                         error = NFSERR_BADXDR;
4960                         goto nfsmout;
4961                 }
4962                 dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4963                     M_WAITOK | M_ZERO);
4964                 dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4965                 dsp->nfsclds_servownlen = len;
4966                 dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4967                 dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4968                 dsp->nfsclds_sess.nfsess_sequenceid =
4969                     fxdr_unsigned(uint32_t, *tl++);
4970                 v41flags = fxdr_unsigned(uint32_t, *tl);
4971                 if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4972                     NFSHASPNFSOPT(nmp)) {
4973                         NFSCL_DEBUG(1, "set PNFS\n");
4974                         NFSLOCKMNT(nmp);
4975                         nmp->nm_state |= NFSSTA_PNFS;
4976                         NFSUNLOCKMNT(nmp);
4977                         dsp->nfsclds_flags |= NFSCLDS_MDS;
4978                 }
4979                 if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4980                         dsp->nfsclds_flags |= NFSCLDS_DS;
4981                 if (minorvers == NFSV42_MINORVERSION)
4982                         dsp->nfsclds_flags |= NFSCLDS_MINORV2;
4983                 if (len > 0)
4984                         nd->nd_repstat = nfsrv_mtostr(nd,
4985                             dsp->nfsclds_serverown, len);
4986                 if (nd->nd_repstat == 0) {
4987                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4988                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4989                             NULL, MTX_DEF);
4990                         nfscl_initsessionslots(&dsp->nfsclds_sess);
4991                         *dspp = dsp;
4992                 } else
4993                         free(dsp, M_NFSCLDS);
4994         }
4995         error = nd->nd_repstat;
4996 nfsmout:
4997         m_freem(nd->nd_mrep);
4998         return (error);
4999 }
5000
5001 /*
5002  * Do the NFSv4.1 Create Session.
5003  */
5004 int
5005 nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5006     struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5007     struct ucred *cred, NFSPROC_T *p)
5008 {
5009         uint32_t crflags, maxval, *tl;
5010         struct nfsrv_descript nfsd;
5011         struct nfsrv_descript *nd = &nfsd;
5012         int error, irdcnt, minorvers;
5013
5014         /* Make sure nm_rsize, nm_wsize is set. */
5015         if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5016                 nmp->nm_rsize = NFS_MAXBSIZE;
5017         if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5018                 nmp->nm_wsize = NFS_MAXBSIZE;
5019         if (dsp == NULL)
5020                 minorvers = nmp->nm_minorvers;
5021         else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5022                 minorvers = NFSV42_MINORVERSION;
5023         else
5024                 minorvers = NFSV41_MINORVERSION;
5025         nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5026             NFS_VER4, minorvers, NULL);
5027         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5028         *tl++ = sep->nfsess_clientid.lval[0];
5029         *tl++ = sep->nfsess_clientid.lval[1];
5030         *tl++ = txdr_unsigned(sequenceid);
5031         crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5032         if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5033                 crflags |= NFSV4CRSESS_CONNBACKCHAN;
5034         *tl = txdr_unsigned(crflags);
5035
5036         /* Fill in fore channel attributes. */
5037         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5038         *tl++ = 0;                              /* Header pad size */
5039         if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5040             nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5041                 /*
5042                  * NFSv4.2 Extended Attribute operations may want to do
5043                  * requests/replies that are larger than nm_rsize/nm_wsize.
5044                  */
5045                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5046                 *tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5047         } else {
5048                 *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5049                 *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5050         }
5051         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
5052         *tl++ = txdr_unsigned(20);              /* Max operations */
5053         *tl++ = txdr_unsigned(64);              /* Max slots */
5054         *tl = 0;                                /* No rdma ird */
5055
5056         /* Fill in back channel attributes. */
5057         NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5058         *tl++ = 0;                              /* Header pad size */
5059         *tl++ = txdr_unsigned(10000);           /* Max request size */
5060         *tl++ = txdr_unsigned(10000);           /* Max response size */
5061         *tl++ = txdr_unsigned(4096);            /* Max response size cached */
5062         *tl++ = txdr_unsigned(4);               /* Max operations */
5063         *tl++ = txdr_unsigned(NFSV4_CBSLOTS);   /* Max slots */
5064         *tl = 0;                                /* No rdma ird */
5065
5066         NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5067         *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */
5068
5069         /* Allow AUTH_SYS callbacks as uid, gid == 0. */
5070         *tl++ = txdr_unsigned(1);               /* Auth_sys only */
5071         *tl++ = txdr_unsigned(AUTH_SYS);        /* AUTH_SYS type */
5072         *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5073         *tl++ = 0;                              /* Null machine name */
5074         *tl++ = 0;                              /* Uid == 0 */
5075         *tl++ = 0;                              /* Gid == 0 */
5076         *tl = 0;                                /* No additional gids */
5077         nd->nd_flag |= ND_USEGSSNAME;
5078         error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5079             NFS_VER4, NULL, 1, NULL, NULL);
5080         if (error != 0)
5081                 return (error);
5082         if (nd->nd_repstat == 0) {
5083                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5084                     2 * NFSX_UNSIGNED);
5085                 bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5086                 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5087                 sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5088                 crflags = fxdr_unsigned(uint32_t, *tl);
5089                 if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5090                         NFSLOCKMNT(nmp);
5091                         nmp->nm_state |= NFSSTA_SESSPERSIST;
5092                         NFSUNLOCKMNT(nmp);
5093                 }
5094
5095                 /* Get the fore channel slot count. */
5096                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5097                 tl++;                   /* Skip the header pad size. */
5098
5099                 /* Make sure nm_wsize is small enough. */
5100                 maxval = fxdr_unsigned(uint32_t, *tl++);
5101                 while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5102                         if (nmp->nm_wsize > 8096)
5103                                 nmp->nm_wsize /= 2;
5104                         else
5105                                 break;
5106                 }
5107                 sep->nfsess_maxreq = maxval;
5108
5109                 /* Make sure nm_rsize is small enough. */
5110                 maxval = fxdr_unsigned(uint32_t, *tl++);
5111                 while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5112                         if (nmp->nm_rsize > 8096)
5113                                 nmp->nm_rsize /= 2;
5114                         else
5115                                 break;
5116                 }
5117                 sep->nfsess_maxresp = maxval;
5118
5119                 sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5120                 tl++;
5121                 sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5122                 NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5123                 irdcnt = fxdr_unsigned(int, *tl);
5124                 if (irdcnt < 0 || irdcnt > 1) {
5125                         error = NFSERR_BADXDR;
5126                         goto nfsmout;
5127                 }
5128                 if (irdcnt > 0)
5129                         NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5130
5131                 /* and the back channel slot count. */
5132                 NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5133                 tl += 5;
5134                 sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5135                 NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5136         }
5137         error = nd->nd_repstat;
5138 nfsmout:
5139         m_freem(nd->nd_mrep);
5140         return (error);
5141 }
5142
5143 /*
5144  * Do the NFSv4.1 Destroy Client.
5145  */
5146 int
5147 nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5148     struct ucred *cred, NFSPROC_T *p)
5149 {
5150         uint32_t *tl;
5151         struct nfsrv_descript nfsd;
5152         struct nfsrv_descript *nd = &nfsd;
5153         int error;
5154         struct nfsclsession *tsep;
5155
5156         nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5157             0, NULL);
5158         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5159         tsep = nfsmnt_mdssession(nmp);
5160         *tl++ = tsep->nfsess_clientid.lval[0];
5161         *tl = tsep->nfsess_clientid.lval[1];
5162         nd->nd_flag |= ND_USEGSSNAME;
5163         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5164             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5165         if (error != 0)
5166                 return (error);
5167         error = nd->nd_repstat;
5168         m_freem(nd->nd_mrep);
5169         return (error);
5170 }
5171
5172 /*
5173  * Do the NFSv4.1 LayoutGet.
5174  */
5175 static int
5176 nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5177     uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5178     int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5179     struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p,
5180     void *stuff)
5181 {
5182         struct nfsrv_descript nfsd, *nd = &nfsd;
5183         int error;
5184
5185         nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5186             0, cred);
5187         nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5188             layouttype, layoutlen, 0);
5189         nd->nd_flag |= ND_USEGSSNAME;
5190         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5191             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5192         NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5193         if (error != 0)
5194                 return (error);
5195         if (nd->nd_repstat == 0)
5196                 error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5197                     flhp);
5198         if (error == 0 && nd->nd_repstat != 0)
5199                 error = nd->nd_repstat;
5200         m_freem(nd->nd_mrep);
5201         return (error);
5202 }
5203
5204 /*
5205  * Do the NFSv4.1 Get Device Info.
5206  */
5207 int
5208 nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5209     uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5210     NFSPROC_T *p)
5211 {
5212         uint32_t cnt, *tl, vers, minorvers;
5213         struct nfsrv_descript nfsd;
5214         struct nfsrv_descript *nd = &nfsd;
5215         struct sockaddr_in sin, ssin;
5216         struct sockaddr_in6 sin6, ssin6;
5217         struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5218         struct nfscldevinfo *ndi;
5219         int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5220         int stripecnt;
5221         uint8_t stripeindex;
5222         sa_family_t af, safilled;
5223
5224         ssin.sin_port = 0;              /* To shut up compiler. */
5225         ssin.sin_addr.s_addr = 0;       /* ditto */
5226         *ndip = NULL;
5227         ndi = NULL;
5228         gotdspp = NULL;
5229         nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5230             0, cred);
5231         NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5232         NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5233         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5234         *tl++ = txdr_unsigned(layouttype);
5235         *tl++ = txdr_unsigned(100000);
5236         if (notifybitsp != NULL && *notifybitsp != 0) {
5237                 *tl = txdr_unsigned(1);         /* One word of bits. */
5238                 NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5239                 *tl = txdr_unsigned(*notifybitsp);
5240         } else
5241                 *tl = txdr_unsigned(0);
5242         nd->nd_flag |= ND_USEGSSNAME;
5243         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5244             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5245         if (error != 0)
5246                 return (error);
5247         if (nd->nd_repstat == 0) {
5248                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5249                 if (layouttype != fxdr_unsigned(int, *tl))
5250                         printf("EEK! devinfo layout type not same!\n");
5251                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5252                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5253                         stripecnt = fxdr_unsigned(int, *tl);
5254                         NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5255                         if (stripecnt < 1 || stripecnt > 4096) {
5256                                 printf("pNFS File layout devinfo stripecnt %d:"
5257                                     " out of range\n", stripecnt);
5258                                 error = NFSERR_BADXDR;
5259                                 goto nfsmout;
5260                         }
5261                         NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5262                             NFSX_UNSIGNED);
5263                         addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5264                         NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5265                         if (addrcnt < 1 || addrcnt > 128) {
5266                                 printf("NFS devinfo addrcnt %d: out of range\n",
5267                                     addrcnt);
5268                                 error = NFSERR_BADXDR;
5269                                 goto nfsmout;
5270                         }
5271
5272                         /*
5273                          * Now we know how many stripe indices and addresses, so
5274                          * we can allocate the structure the correct size.
5275                          */
5276                         i = (stripecnt * sizeof(uint8_t)) /
5277                             sizeof(struct nfsclds *) + 1;
5278                         NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5279                         ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5280                             sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5281                             M_ZERO);
5282                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5283                             NFSX_V4DEVICEID);
5284                         ndi->nfsdi_refcnt = 0;
5285                         ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5286                         ndi->nfsdi_stripecnt = stripecnt;
5287                         ndi->nfsdi_addrcnt = addrcnt;
5288                         /* Fill in the stripe indices. */
5289                         for (i = 0; i < stripecnt; i++) {
5290                                 stripeindex = fxdr_unsigned(uint8_t, *tl++);
5291                                 NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5292                                 if (stripeindex >= addrcnt) {
5293                                         printf("pNFS File Layout devinfo"
5294                                             " stripeindex %d: too big\n",
5295                                             (int)stripeindex);
5296                                         error = NFSERR_BADXDR;
5297                                         goto nfsmout;
5298                                 }
5299                                 nfsfldi_setstripeindex(ndi, i, stripeindex);
5300                         }
5301                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
5302                         /* For Flex File, we only get one address list. */
5303                         ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5304                             M_NFSDEVINFO, M_WAITOK | M_ZERO);
5305                         NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5306                             NFSX_V4DEVICEID);
5307                         ndi->nfsdi_refcnt = 0;
5308                         ndi->nfsdi_flags = NFSDI_FLEXFILE;
5309                         addrcnt = ndi->nfsdi_addrcnt = 1;
5310                 }
5311
5312                 /* Now, dissect the server address(es). */
5313                 safilled = AF_UNSPEC;
5314                 for (i = 0; i < addrcnt; i++) {
5315                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5316                         cnt = fxdr_unsigned(uint32_t, *tl);
5317                         if (cnt == 0) {
5318                                 printf("NFS devinfo 0 len addrlist\n");
5319                                 error = NFSERR_BADXDR;
5320                                 goto nfsmout;
5321                         }
5322                         dspp = nfsfldi_addr(ndi, i);
5323                         safilled = AF_UNSPEC;
5324                         for (j = 0; j < cnt; j++) {
5325                                 error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5326                                     &isudp);
5327                                 if (error != 0 && error != EPERM) {
5328                                         error = NFSERR_BADXDR;
5329                                         goto nfsmout;
5330                                 }
5331                                 if (error == 0 && isudp == 0) {
5332                                         /*
5333                                          * The priority is:
5334                                          * - Same address family.
5335                                          * Save the address and dspp, so that
5336                                          * the connection can be done after
5337                                          * parsing is complete.
5338                                          */
5339                                         if (safilled == AF_UNSPEC ||
5340                                             (af == nmp->nm_nam->sa_family &&
5341                                              safilled != nmp->nm_nam->sa_family)
5342                                            ) {
5343                                                 if (af == AF_INET)
5344                                                         ssin = sin;
5345                                                 else
5346                                                         ssin6 = sin6;
5347                                                 safilled = af;
5348                                                 gotdspp = dspp;
5349                                         }
5350                                 }
5351                         }
5352                 }
5353
5354                 gotvers = NFS_VER4;     /* Default NFSv4.1 for File Layout. */
5355                 gotminor = NFSV41_MINORVERSION;
5356                 /* For Flex File, we will take one of the versions to use. */
5357                 if (layouttype == NFSLAYOUT_FLEXFILE) {
5358                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5359                         j = fxdr_unsigned(int, *tl);
5360                         if (j < 1 || j > NFSDEV_MAXVERS) {
5361                                 printf("pNFS: too many versions\n");
5362                                 error = NFSERR_BADXDR;
5363                                 goto nfsmout;
5364                         }
5365                         gotvers = 0;
5366                         gotminor = 0;
5367                         for (i = 0; i < j; i++) {
5368                                 NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5369                                 vers = fxdr_unsigned(uint32_t, *tl++);
5370                                 minorvers = fxdr_unsigned(uint32_t, *tl++);
5371                                 if (vers == NFS_VER3)
5372                                         minorvers = 0;
5373                                 if ((vers == NFS_VER4 && ((minorvers ==
5374                                     NFSV41_MINORVERSION && gotminor == 0) ||
5375                                     minorvers == NFSV42_MINORVERSION)) ||
5376                                     (vers == NFS_VER3 && gotvers == 0)) {
5377                                         gotvers = vers;
5378                                         gotminor = minorvers;
5379                                         /* We'll take this one. */
5380                                         ndi->nfsdi_versindex = i;
5381                                         ndi->nfsdi_vers = vers;
5382                                         ndi->nfsdi_minorvers = minorvers;
5383                                         ndi->nfsdi_rsize = fxdr_unsigned(
5384                                             uint32_t, *tl++);
5385                                         ndi->nfsdi_wsize = fxdr_unsigned(
5386                                             uint32_t, *tl++);
5387                                         if (*tl == newnfs_true)
5388                                                 ndi->nfsdi_flags |=
5389                                                     NFSDI_TIGHTCOUPLED;
5390                                         else
5391                                                 ndi->nfsdi_flags &=
5392                                                     ~NFSDI_TIGHTCOUPLED;
5393                                 }
5394                         }
5395                         if (gotvers == 0) {
5396                                 printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5397                                 error = NFSERR_BADXDR;
5398                                 goto nfsmout;
5399                         }
5400                 }
5401
5402                 /* And the notify bits. */
5403                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5404                 bitcnt = fxdr_unsigned(int, *tl);
5405                 if (bitcnt > 0) {
5406                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5407                         if (notifybitsp != NULL)
5408                                 *notifybitsp =
5409                                     fxdr_unsigned(uint32_t, *tl);
5410                 }
5411                 if (safilled != AF_UNSPEC) {
5412                         KASSERT(ndi != NULL, ("ndi is NULL"));
5413                         *ndip = ndi;
5414                 } else
5415                         error = EPERM;
5416                 if (error == 0) {
5417                         /*
5418                          * Now we can do a TCP connection for the correct
5419                          * NFS version and IP address.
5420                          */
5421                         error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5422                             gotvers, gotminor, &dsp, p);
5423                 }
5424                 if (error == 0) {
5425                         KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5426                         *gotdspp = dsp;
5427                 }
5428         }
5429         if (nd->nd_repstat != 0 && error == 0)
5430                 error = nd->nd_repstat;
5431 nfsmout:
5432         if (error != 0 && ndi != NULL)
5433                 nfscl_freedevinfo(ndi);
5434         m_freem(nd->nd_mrep);
5435         return (error);
5436 }
5437
5438 /*
5439  * Do the NFSv4.1 LayoutCommit.
5440  */
5441 int
5442 nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5443     uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5444     int layouttype, struct ucred *cred, NFSPROC_T *p, void *stuff)
5445 {
5446         uint32_t *tl;
5447         struct nfsrv_descript nfsd, *nd = &nfsd;
5448         int error;
5449
5450         nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5451             0, 0, cred);
5452         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5453             NFSX_STATEID);
5454         txdr_hyper(off, tl);
5455         tl += 2;
5456         txdr_hyper(len, tl);
5457         tl += 2;
5458         if (reclaim != 0)
5459                 *tl++ = newnfs_true;
5460         else
5461                 *tl++ = newnfs_false;
5462         *tl++ = txdr_unsigned(stateidp->seqid);
5463         *tl++ = stateidp->other[0];
5464         *tl++ = stateidp->other[1];
5465         *tl++ = stateidp->other[2];
5466         *tl++ = newnfs_true;
5467         if (lastbyte < off)
5468                 lastbyte = off;
5469         else if (lastbyte >= (off + len))
5470                 lastbyte = off + len - 1;
5471         txdr_hyper(lastbyte, tl);
5472         tl += 2;
5473         *tl++ = newnfs_false;
5474         *tl++ = txdr_unsigned(layouttype);
5475         /* All supported layouts are 0 length. */
5476         *tl = txdr_unsigned(0);
5477         nd->nd_flag |= ND_USEGSSNAME;
5478         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5479             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5480         if (error != 0)
5481                 return (error);
5482         error = nd->nd_repstat;
5483         m_freem(nd->nd_mrep);
5484         return (error);
5485 }
5486
5487 /*
5488  * Do the NFSv4.1 LayoutReturn.
5489  */
5490 int
5491 nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5492     int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5493     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5494     uint32_t stat, uint32_t op, char *devid)
5495 {
5496         uint32_t *tl;
5497         struct nfsrv_descript nfsd, *nd = &nfsd;
5498         uint64_t tu64;
5499         int error;
5500
5501         nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5502             0, 0, cred);
5503         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5504         if (reclaim != 0)
5505                 *tl++ = newnfs_true;
5506         else
5507                 *tl++ = newnfs_false;
5508         *tl++ = txdr_unsigned(layouttype);
5509         *tl++ = txdr_unsigned(iomode);
5510         *tl = txdr_unsigned(layoutreturn);
5511         if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5512                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5513                     NFSX_UNSIGNED);
5514                 txdr_hyper(offset, tl);
5515                 tl += 2;
5516                 txdr_hyper(len, tl);
5517                 tl += 2;
5518                 NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5519                 *tl++ = txdr_unsigned(stateidp->seqid);
5520                 *tl++ = stateidp->other[0];
5521                 *tl++ = stateidp->other[1];
5522                 *tl++ = stateidp->other[2];
5523                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5524                         *tl = txdr_unsigned(0);
5525                 else if (layouttype == NFSLAYOUT_FLEXFILE) {
5526                         if (stat != 0) {
5527                                 *tl = txdr_unsigned(2 * NFSX_HYPER +
5528                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5529                                     NFSX_UNSIGNED);
5530                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5531                                     NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5532                                     NFSX_UNSIGNED);
5533                                 *tl++ = txdr_unsigned(1);       /* One error. */
5534                                 tu64 = 0;                       /* Offset. */
5535                                 txdr_hyper(tu64, tl); tl += 2;
5536                                 tu64 = UINT64_MAX;              /* Length. */
5537                                 txdr_hyper(tu64, tl); tl += 2;
5538                                 NFSBCOPY(stateidp, tl, NFSX_STATEID);
5539                                 tl += (NFSX_STATEID / NFSX_UNSIGNED);
5540                                 *tl++ = txdr_unsigned(1);       /* One error. */
5541                                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5542                                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5543                                 *tl++ = txdr_unsigned(stat);
5544                                 *tl++ = txdr_unsigned(op);
5545                         } else {
5546                                 *tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5547                                 NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5548                                 /* No ioerrs. */
5549                                 *tl++ = 0;
5550                         }
5551                         *tl = 0;        /* No stats yet. */
5552                 }
5553         }
5554         nd->nd_flag |= ND_USEGSSNAME;
5555         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5556             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5557         if (error != 0)
5558                 return (error);
5559         if (nd->nd_repstat == 0) {
5560                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5561                 if (*tl != 0) {
5562                         NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5563                         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5564                         stateidp->other[0] = *tl++;
5565                         stateidp->other[1] = *tl++;
5566                         stateidp->other[2] = *tl;
5567                 }
5568         } else
5569                 error = nd->nd_repstat;
5570 nfsmout:
5571         m_freem(nd->nd_mrep);
5572         return (error);
5573 }
5574
5575 /*
5576  * Do the NFSv4.2 LayoutError.
5577  */
5578 static int
5579 nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5580     uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5581     uint32_t stat, uint32_t op, char *devid)
5582 {
5583         uint32_t *tl;
5584         struct nfsrv_descript nfsd, *nd = &nfsd;
5585         int error;
5586
5587         nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5588             0, 0, cred);
5589         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5590             NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5591         txdr_hyper(offset, tl); tl += 2;
5592         txdr_hyper(len, tl); tl += 2;
5593         *tl++ = txdr_unsigned(stateidp->seqid);
5594         *tl++ = stateidp->other[0];
5595         *tl++ = stateidp->other[1];
5596         *tl++ = stateidp->other[2];
5597         *tl++ = txdr_unsigned(1);
5598         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5599         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5600         *tl++ = txdr_unsigned(stat);
5601         *tl = txdr_unsigned(op);
5602         nd->nd_flag |= ND_USEGSSNAME;
5603         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5604             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5605         if (error != 0)
5606                 return (error);
5607         if (nd->nd_repstat != 0)
5608                 error = nd->nd_repstat;
5609         m_freem(nd->nd_mrep);
5610         return (error);
5611 }
5612
5613 /*
5614  * Acquire a layout and devinfo, if possible. The caller must have acquired
5615  * a reference count on the nfsclclient structure before calling this.
5616  * Return the layout in lypp with a reference count on it, if successful.
5617  */
5618 static int
5619 nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5620     int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5621     uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5622 {
5623         struct nfscllayout *lyp;
5624         struct nfsclflayout *flp;
5625         struct nfsclflayouthead flh;
5626         int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5627         nfsv4stateid_t stateid;
5628         struct nfsclsession *tsep;
5629
5630         *lypp = NULL;
5631         if (NFSHASFLEXFILE(nmp))
5632                 layouttype = NFSLAYOUT_FLEXFILE;
5633         else
5634                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
5635         /*
5636          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5637          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5638          * flp == NULL.
5639          */
5640         lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5641             off, rw, &flp, &recalled);
5642         islocked = 0;
5643         if (lyp == NULL || flp == NULL) {
5644                 if (recalled != 0)
5645                         return (EIO);
5646                 LIST_INIT(&flh);
5647                 tsep = nfsmnt_mdssession(nmp);
5648                 layoutlen = tsep->nfsess_maxcache -
5649                     (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5650                 if (lyp == NULL) {
5651                         stateid.seqid = 0;
5652                         stateid.other[0] = stateidp->other[0];
5653                         stateid.other[1] = stateidp->other[1];
5654                         stateid.other[2] = stateidp->other[2];
5655                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5656                             nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5657                             (uint64_t)0, layouttype, layoutlen, &stateid,
5658                             &retonclose, &flh, cred, p, NULL);
5659                 } else {
5660                         islocked = 1;
5661                         stateid.seqid = lyp->nfsly_stateid.seqid;
5662                         stateid.other[0] = lyp->nfsly_stateid.other[0];
5663                         stateid.other[1] = lyp->nfsly_stateid.other[1];
5664                         stateid.other[2] = lyp->nfsly_stateid.other[2];
5665                         error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5666                             nfhp->nfh_len, iomode, off, UINT64_MAX,
5667                             (uint64_t)0, layouttype, layoutlen, &stateid,
5668                             &retonclose, &flh, cred, p, NULL);
5669                 }
5670                 error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5671                     nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5672                     &flh, layouttype, error, NULL, cred, p);
5673                 if (error == 0)
5674                         *lypp = lyp;
5675                 else if (islocked != 0)
5676                         nfscl_rellayout(lyp, 1);
5677         } else
5678                 *lypp = lyp;
5679         return (error);
5680 }
5681
5682 /*
5683  * Do a TCP connection plus exchange id and create session.
5684  * If successful, a "struct nfsclds" is linked into the list for the
5685  * mount point and a pointer to it is returned.
5686  */
5687 static int
5688 nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5689     struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5690     struct nfsclds **dspp, NFSPROC_T *p)
5691 {
5692         struct sockaddr_in *msad, *sad;
5693         struct sockaddr_in6 *msad6, *sad6;
5694         struct nfsclclient *clp;
5695         struct nfssockreq *nrp;
5696         struct nfsclds *dsp, *tdsp;
5697         int error, firsttry;
5698         enum nfsclds_state retv;
5699         uint32_t sequenceid = 0;
5700
5701         KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5702             ("nfsrpc_fillsa: NULL nr_cred"));
5703         NFSLOCKCLSTATE();
5704         clp = nmp->nm_clp;
5705         NFSUNLOCKCLSTATE();
5706         if (clp == NULL)
5707                 return (EPERM);
5708         if (af == AF_INET) {
5709                 NFSLOCKMNT(nmp);
5710                 /*
5711                  * Check to see if we already have a session for this
5712                  * address that is usable for a DS.
5713                  * Note that the MDS's address is in a different place
5714                  * than the sessions already acquired for DS's.
5715                  */
5716                 msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5717                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5718                 while (tdsp != NULL) {
5719                         if (msad != NULL && msad->sin_family == AF_INET &&
5720                             sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5721                             sin->sin_port == msad->sin_port &&
5722                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5723                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5724                                 *dspp = tdsp;
5725                                 NFSUNLOCKMNT(nmp);
5726                                 NFSCL_DEBUG(4, "fnd same addr\n");
5727                                 return (0);
5728                         }
5729                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5730                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5731                                 msad = (struct sockaddr_in *)
5732                                     tdsp->nfsclds_sockp->nr_nam;
5733                         else
5734                                 msad = NULL;
5735                 }
5736                 NFSUNLOCKMNT(nmp);
5737
5738                 /* No IP address match, so look for new/trunked one. */
5739                 sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5740                 sad->sin_len = sizeof(*sad);
5741                 sad->sin_family = AF_INET;
5742                 sad->sin_port = sin->sin_port;
5743                 sad->sin_addr.s_addr = sin->sin_addr.s_addr;
5744                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5745                 nrp->nr_nam = (struct sockaddr *)sad;
5746         } else if (af == AF_INET6) {
5747                 NFSLOCKMNT(nmp);
5748                 /*
5749                  * Check to see if we already have a session for this
5750                  * address that is usable for a DS.
5751                  * Note that the MDS's address is in a different place
5752                  * than the sessions already acquired for DS's.
5753                  */
5754                 msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5755                 tdsp = TAILQ_FIRST(&nmp->nm_sess);
5756                 while (tdsp != NULL) {
5757                         if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5758                             IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
5759                             &msad6->sin6_addr) &&
5760                             sin6->sin6_port == msad6->sin6_port &&
5761                             (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5762                             tdsp->nfsclds_sess.nfsess_defunct == 0) {
5763                                 *dspp = tdsp;
5764                                 NFSUNLOCKMNT(nmp);
5765                                 return (0);
5766                         }
5767                         tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5768                         if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5769                                 msad6 = (struct sockaddr_in6 *)
5770                                     tdsp->nfsclds_sockp->nr_nam;
5771                         else
5772                                 msad6 = NULL;
5773                 }
5774                 NFSUNLOCKMNT(nmp);
5775
5776                 /* No IP address match, so look for new/trunked one. */
5777                 sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5778                 sad6->sin6_len = sizeof(*sad6);
5779                 sad6->sin6_family = AF_INET6;
5780                 sad6->sin6_port = sin6->sin6_port;
5781                 NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
5782                     sizeof(struct in6_addr));
5783                 nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5784                 nrp->nr_nam = (struct sockaddr *)sad6;
5785         } else
5786                 return (EPERM);
5787
5788         nrp->nr_sotype = SOCK_STREAM;
5789         mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5790         nrp->nr_prog = NFS_PROG;
5791         nrp->nr_vers = vers;
5792
5793         /*
5794          * Use the credentials that were used for the mount, which are
5795          * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5796          * Ref. counting the credentials with crhold() is probably not
5797          * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5798          * unmount, but I did it anyhow.
5799          */
5800         nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5801         error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
5802         NFSCL_DEBUG(3, "DS connect=%d\n", error);
5803
5804         dsp = NULL;
5805         /* Now, do the exchangeid and create session. */
5806         if (error == 0) {
5807                 if (vers == NFS_VER4) {
5808                         firsttry = 0;
5809                         do {
5810                                 error = nfsrpc_exchangeid(nmp, clp, nrp, 
5811                                     minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
5812                                     nrp->nr_cred, p);
5813                                 NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5814                                 if (error == NFSERR_MINORVERMISMATCH)
5815                                         minorvers = NFSV42_MINORVERSION;
5816                         } while (error == NFSERR_MINORVERMISMATCH &&
5817                             firsttry++ == 0);
5818                         if (error != 0)
5819                                 newnfs_disconnect(NULL, nrp);
5820                 } else {
5821                         dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
5822                             M_WAITOK | M_ZERO);
5823                         dsp->nfsclds_flags |= NFSCLDS_DS;
5824                         dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
5825                         mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5826                         mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5827                             NULL, MTX_DEF);
5828                 }
5829         }
5830         if (error == 0) {
5831                 dsp->nfsclds_sockp = nrp;
5832                 if (vers == NFS_VER4) {
5833                         NFSLOCKMNT(nmp);
5834                         retv = nfscl_getsameserver(nmp, dsp, &tdsp,
5835                             &sequenceid);
5836                         NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5837                         if (retv == NFSDSP_USETHISSESSION &&
5838                             nfscl_dssameconn != 0) {
5839                                 NFSLOCKDS(tdsp);
5840                                 tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
5841                                 NFSUNLOCKDS(tdsp);
5842                                 NFSUNLOCKMNT(nmp);
5843                                 /*
5844                                  * If there is already a session for this
5845                                  * server, use it.
5846                                  */
5847                                 newnfs_disconnect(NULL, nrp);
5848                                 nfscl_freenfsclds(dsp);
5849                                 *dspp = tdsp;
5850                                 return (0);
5851                         }
5852                         if (retv == NFSDSP_NOTFOUND)
5853                                 sequenceid =
5854                                     dsp->nfsclds_sess.nfsess_sequenceid;
5855                         NFSUNLOCKMNT(nmp);
5856                         error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5857                             nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
5858                         NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5859                 }
5860         } else {
5861                 NFSFREECRED(nrp->nr_cred);
5862                 NFSFREEMUTEX(&nrp->nr_mtx);
5863                 free(nrp->nr_nam, M_SONAME);
5864                 free(nrp, M_NFSSOCKREQ);
5865         }
5866         if (error == 0) {
5867                 NFSCL_DEBUG(3, "add DS session\n");
5868                 /*
5869                  * Put it at the end of the list. That way the list
5870                  * is ordered by when the entry was added. This matters
5871                  * since the one done first is the one that should be
5872                  * used for sequencid'ing any subsequent create sessions.
5873                  */
5874                 NFSLOCKMNT(nmp);
5875                 TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5876                 NFSUNLOCKMNT(nmp);
5877                 *dspp = dsp;
5878         } else if (dsp != NULL) {
5879                 newnfs_disconnect(NULL, nrp);
5880                 nfscl_freenfsclds(dsp);
5881         }
5882         return (error);
5883 }
5884
5885 /*
5886  * Do the NFSv4.1 Reclaim Complete.
5887  */
5888 int
5889 nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5890 {
5891         uint32_t *tl;
5892         struct nfsrv_descript nfsd;
5893         struct nfsrv_descript *nd = &nfsd;
5894         int error;
5895
5896         nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
5897             0, cred);
5898         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5899         *tl = newnfs_false;
5900         nd->nd_flag |= ND_USEGSSNAME;
5901         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5902             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5903         if (error != 0)
5904                 return (error);
5905         error = nd->nd_repstat;
5906         m_freem(nd->nd_mrep);
5907         return (error);
5908 }
5909
5910 /*
5911  * Initialize the slot tables for a session.
5912  */
5913 static void
5914 nfscl_initsessionslots(struct nfsclsession *sep)
5915 {
5916         int i;
5917
5918         for (i = 0; i < NFSV4_CBSLOTS; i++) {
5919                 if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5920                         m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5921                 NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5922         }
5923         for (i = 0; i < 64; i++)
5924                 sep->nfsess_slotseq[i] = 0;
5925         sep->nfsess_slots = 0;
5926         sep->nfsess_badslots = 0;
5927 }
5928
5929 /*
5930  * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5931  */
5932 int
5933 nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5934     uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5935 {
5936         struct nfsnode *np = VTONFS(vp);
5937         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5938         struct nfscllayout *layp;
5939         struct nfscldevinfo *dip;
5940         struct nfsclflayout *rflp;
5941         struct mbuf *m, *m2;
5942         struct nfsclwritedsdorpc *drpc, *tdrpc;
5943         nfsv4stateid_t stateid;
5944         struct ucred *newcred;
5945         uint64_t lastbyte, len, off, oresid, xfer;
5946         int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
5947         void *lckp;
5948         uint8_t *dev;
5949         void *iovbase = NULL;
5950         size_t iovlen = 0;
5951         off_t offs = 0;
5952         ssize_t resid = 0;
5953         uint32_t op;
5954
5955         if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5956             (np->n_flag & NNOLAYOUT) != 0)
5957                 return (EIO);
5958         /* Now, get a reference cnt on the clientid for this mount. */
5959         if (nfscl_getref(nmp) == 0)
5960                 return (EIO);
5961
5962         /* Find an appropriate stateid. */
5963         newcred = NFSNEWCRED(cred);
5964         error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5965             rwaccess, 1, newcred, p, &stateid, &lckp);
5966         if (error != 0) {
5967                 NFSFREECRED(newcred);
5968                 nfscl_relref(nmp);
5969                 return (error);
5970         }
5971         /* Search for a layout for this file. */
5972         off = uiop->uio_offset;
5973         layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5974             np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
5975         if (layp == NULL || rflp == NULL) {
5976                 if (recalled != 0) {
5977                         NFSFREECRED(newcred);
5978                         if (lckp != NULL)
5979                                 nfscl_lockderef(lckp);
5980                         nfscl_relref(nmp);
5981                         return (EIO);
5982                 }
5983                 if (layp != NULL) {
5984                         nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5985                         layp = NULL;
5986                 }
5987                 /* Try and get a Layout, if it is supported. */
5988                 if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5989                     (np->n_flag & NWRITEOPENED) != 0)
5990                         iolaymode = NFSLAYOUTIOMODE_RW;
5991                 else
5992                         iolaymode = NFSLAYOUTIOMODE_READ;
5993                 error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5994                     rwaccess, NULL, &stateid, off, &layp, newcred, p);
5995                 if (error != 0) {
5996                         NFSLOCKNODE(np);
5997                         np->n_flag |= NNOLAYOUT;
5998                         NFSUNLOCKNODE(np);
5999                         if (lckp != NULL)
6000                                 nfscl_lockderef(lckp);
6001                         NFSFREECRED(newcred);
6002                         if (layp != NULL)
6003                                 nfscl_rellayout(layp, 0);
6004                         nfscl_relref(nmp);
6005                         return (error);
6006                 }
6007         }
6008
6009         /*
6010          * Loop around finding a layout that works for the first part of
6011          * this I/O operation, and then call the function that actually
6012          * does the RPC.
6013          */
6014         eof = 0;
6015         len = (uint64_t)uiop->uio_resid;
6016         while (len > 0 && error == 0 && eof == 0) {
6017                 off = uiop->uio_offset;
6018                 error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6019                 if (error == 0) {
6020                         oresid = xfer = (uint64_t)uiop->uio_resid;
6021                         if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6022                                 xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6023                         /*
6024                          * For Flex File layout with mirrored DSs, select one
6025                          * of them at random for reads. For writes and commits,
6026                          * do all mirrors.
6027                          */
6028                         m = NULL;
6029                         tdrpc = drpc = NULL;
6030                         firstmirror = 0;
6031                         mirrorcnt = 1;
6032                         if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6033                             (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6034                                 if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6035                                         firstmirror = arc4random() % mirrorcnt;
6036                                         mirrorcnt = firstmirror + 1;
6037                                 } else {
6038                                         if (docommit == 0) {
6039                                                 /*
6040                                                  * Save values, so uiop can be
6041                                                  * rolled back upon a write
6042                                                  * error.
6043                                                  */
6044                                                 offs = uiop->uio_offset;
6045                                                 resid = uiop->uio_resid;
6046                                                 iovbase =
6047                                                     uiop->uio_iov->iov_base;
6048                                                 iovlen = uiop->uio_iov->iov_len;
6049                                                 m = nfsm_uiombuflist(uiop, len,
6050                                                     0);
6051                                                 if (m == NULL) {
6052                                                         error = EFAULT;
6053                                                         break;
6054                                                 }
6055                                         }
6056                                         tdrpc = drpc = malloc(sizeof(*drpc) *
6057                                             (mirrorcnt - 1), M_TEMP, M_WAITOK |
6058                                             M_ZERO);
6059                                 }
6060                         }
6061                         for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6062                                 m2 = NULL;
6063                                 if (m != NULL && i < mirrorcnt - 1)
6064                                         m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6065                                 else {
6066                                         m2 = m;
6067                                         m = NULL;
6068                                 }
6069                                 if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6070                                         dev = rflp->nfsfl_ffm[i].dev;
6071                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6072                                             rflp->nfsfl_ffm[i].devp);
6073                                 } else {
6074                                         dev = rflp->nfsfl_dev;
6075                                         dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6076                                             rflp->nfsfl_devp);
6077                                 }
6078                                 if (dip != NULL) {
6079                                         if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6080                                             != 0)
6081                                                 error = nfscl_dofflayoutio(vp,
6082                                                     uiop, iomode, must_commit,
6083                                                     &eof, &stateid, rwaccess,
6084                                                     dip, layp, rflp, off, xfer,
6085                                                     i, docommit, m2, tdrpc,
6086                                                     newcred, p);
6087                                         else
6088                                                 error = nfscl_doflayoutio(vp,
6089                                                     uiop, iomode, must_commit,
6090                                                     &eof, &stateid, rwaccess,
6091                                                     dip, layp, rflp, off, xfer,
6092                                                     docommit, newcred, p);
6093                                         nfscl_reldevinfo(dip);
6094                                 } else {
6095                                         if (m2 != NULL)
6096                                                 m_freem(m2);
6097                                         error = EIO;
6098                                 }
6099                                 tdrpc++;
6100                         }
6101                         if (m != NULL)
6102                                 m_freem(m);
6103                         tdrpc = drpc;
6104                         timo = hz / 50;         /* Wait for 20msec. */
6105                         if (timo < 1)
6106                                 timo = 1;
6107                         for (i = firstmirror; i < mirrorcnt - 1 &&
6108                             tdrpc != NULL; i++, tdrpc++) {
6109                                 /*
6110                                  * For the unused drpc entries, both inprog and
6111                                  * err == 0, so this loop won't break.
6112                                  */
6113                                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
6114                                         tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6115                                             timo);
6116                                 if (error == 0 && tdrpc->err != 0)
6117                                         error = tdrpc->err;
6118                                 if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6119                                     docommit == 0 && *must_commit == 0 &&
6120                                     tdrpc->must_commit == 1)
6121                                         *must_commit = 1;
6122                         }
6123                         free(drpc, M_TEMP);
6124                         if (error == 0) {
6125                                 if (mirrorcnt > 1 && rwaccess ==
6126                                     NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6127                                         NFSLOCKCLSTATE();
6128                                         layp->nfsly_flags |= NFSLY_WRITTEN;
6129                                         NFSUNLOCKCLSTATE();
6130                                 }
6131                                 lastbyte = off + xfer - 1;
6132                                 NFSLOCKCLSTATE();
6133                                 if (lastbyte > layp->nfsly_lastbyte)
6134                                         layp->nfsly_lastbyte = lastbyte;
6135                                 NFSUNLOCKCLSTATE();
6136                         } else if (error == NFSERR_OPENMODE &&
6137                             rwaccess == NFSV4OPEN_ACCESSREAD) {
6138                                 NFSLOCKMNT(nmp);
6139                                 nmp->nm_state |= NFSSTA_OPENMODE;
6140                                 NFSUNLOCKMNT(nmp);
6141                         } else if ((error == NFSERR_NOSPC ||
6142                             error == NFSERR_IO || error == NFSERR_NXIO) &&
6143                             nmp->nm_minorvers == NFSV42_MINORVERSION) {
6144                                 if (docommit != 0)
6145                                         op = NFSV4OP_COMMIT;
6146                                 else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6147                                         op = NFSV4OP_READ;
6148                                 else
6149                                         op = NFSV4OP_WRITE;
6150                                 nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6151                                     np->n_fhp->nfh_len, off, xfer,
6152                                     &layp->nfsly_stateid, newcred, p, error, op,
6153                                     dip->nfsdi_deviceid);
6154                                 error = EIO;
6155                         } else
6156                                 error = EIO;
6157                         if (error == 0)
6158                                 len -= (oresid - (uint64_t)uiop->uio_resid);
6159                         else if (mirrorcnt > 1 && rwaccess ==
6160                             NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6161                                 /*
6162                                  * In case the rpc gets retried, roll the
6163                                  * uio fields changed by nfsm_uiombuflist()
6164                                  * back.
6165                                  */
6166                                 uiop->uio_offset = offs;
6167                                 uiop->uio_resid = resid;
6168                                 uiop->uio_iov->iov_base = iovbase;
6169                                 uiop->uio_iov->iov_len = iovlen;
6170                         }
6171                 }
6172         }
6173         if (lckp != NULL)
6174                 nfscl_lockderef(lckp);
6175         NFSFREECRED(newcred);
6176         nfscl_rellayout(layp, 0);
6177         nfscl_relref(nmp);
6178         return (error);
6179 }
6180
6181 /*
6182  * Find a file layout that will handle the first bytes of the requested
6183  * range and return the information from it needed to the I/O operation.
6184  */
6185 int
6186 nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6187     struct nfsclflayout **retflpp)
6188 {
6189         struct nfsclflayout *flp, *nflp, *rflp;
6190         uint32_t rw;
6191
6192         rflp = NULL;
6193         rw = rwaccess;
6194         /* For reading, do the Read list first and then the Write list. */
6195         do {
6196                 if (rw == NFSV4OPEN_ACCESSREAD)
6197                         flp = LIST_FIRST(&lyp->nfsly_flayread);
6198                 else
6199                         flp = LIST_FIRST(&lyp->nfsly_flayrw);
6200                 while (flp != NULL) {
6201                         nflp = LIST_NEXT(flp, nfsfl_list);
6202                         if (flp->nfsfl_off > off)
6203                                 break;
6204                         if (flp->nfsfl_end > off &&
6205                             (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6206                                 rflp = flp;
6207                         flp = nflp;
6208                 }
6209                 if (rw == NFSV4OPEN_ACCESSREAD)
6210                         rw = NFSV4OPEN_ACCESSWRITE;
6211                 else
6212                         rw = 0;
6213         } while (rw != 0);
6214         if (rflp != NULL) {
6215                 /* This one covers the most bytes starting at off. */
6216                 *retflpp = rflp;
6217                 return (0);
6218         }
6219         return (EIO);
6220 }
6221
6222 /*
6223  * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6224  */
6225 static int
6226 nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6227     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6228     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6229     uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6230 {
6231         uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6232         int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6233         struct nfsnode *np;
6234         struct nfsfh *fhp;
6235         struct nfsclds **dspp;
6236
6237         np = VTONFS(vp);
6238         rel_off = off - flp->nfsfl_patoff;
6239         stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6240         stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6241             dp->nfsdi_stripecnt;
6242         transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6243         error = 0;
6244
6245         /* Loop around, doing I/O for each stripe unit. */
6246         while (len > 0 && error == 0) {
6247                 stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6248                 dspp = nfsfldi_addr(dp, stripe_index);
6249                 if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6250                         minorvers = NFSV42_MINORVERSION;
6251                 else
6252                         minorvers = NFSV41_MINORVERSION;
6253                 if (len > transfer && docommit == 0)
6254                         xfer = transfer;
6255                 else
6256                         xfer = len;
6257                 if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6258                         /* Dense layout. */
6259                         if (stripe_pos >= flp->nfsfl_fhcnt)
6260                                 return (EIO);
6261                         fhp = flp->nfsfl_fh[stripe_pos];
6262                         io_off = (rel_off / (stripe_unit_size *
6263                             dp->nfsdi_stripecnt)) * stripe_unit_size +
6264                             rel_off % stripe_unit_size;
6265                 } else {
6266                         /* Sparse layout. */
6267                         if (flp->nfsfl_fhcnt > 1) {
6268                                 if (stripe_index >= flp->nfsfl_fhcnt)
6269                                         return (EIO);
6270                                 fhp = flp->nfsfl_fh[stripe_index];
6271                         } else if (flp->nfsfl_fhcnt == 1)
6272                                 fhp = flp->nfsfl_fh[0];
6273                         else
6274                                 fhp = np->n_fhp;
6275                         io_off = off;
6276                 }
6277                 if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6278                         commit_thru_mds = 1;
6279                         if (docommit != 0)
6280                                 error = EIO;
6281                 } else {
6282                         commit_thru_mds = 0;
6283                         NFSLOCKNODE(np);
6284                         np->n_flag |= NDSCOMMIT;
6285                         NFSUNLOCKNODE(np);
6286                 }
6287                 if (docommit != 0) {
6288                         if (error == 0)
6289                                 error = nfsrpc_commitds(vp, io_off, xfer,
6290                                     *dspp, fhp, NFS_VER4, minorvers, cred, p);
6291                         if (error == 0) {
6292                                 /*
6293                                  * Set both eof and uio_resid = 0 to end any
6294                                  * loops.
6295                                  */
6296                                 *eofp = 1;
6297                                 uiop->uio_resid = 0;
6298                         } else {
6299                                 NFSLOCKNODE(np);
6300                                 np->n_flag &= ~NDSCOMMIT;
6301                                 NFSUNLOCKNODE(np);
6302                         }
6303                 } else if (rwflag == NFSV4OPEN_ACCESSREAD)
6304                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6305                             io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6306                 else {
6307                         error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6308                             stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6309                             0, NFS_VER4, minorvers, cred, p);
6310                         if (error == 0) {
6311                                 NFSLOCKCLSTATE();
6312                                 lyp->nfsly_flags |= NFSLY_WRITTEN;
6313                                 NFSUNLOCKCLSTATE();
6314                         }
6315                 }
6316                 if (error == 0) {
6317                         transfer = stripe_unit_size;
6318                         stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6319                         len -= xfer;
6320                         off += xfer;
6321                 }
6322         }
6323         return (error);
6324 }
6325
6326 /*
6327  * Do I/O using an NFSv4.1 flex file layout.
6328  */
6329 static int
6330 nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6331     int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6332     struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6333     uint64_t len, int mirror, int docommit, struct mbuf *mp,
6334     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6335 {
6336         uint64_t xfer;
6337         int error;
6338         struct nfsnode *np;
6339         struct nfsfh *fhp;
6340         struct nfsclds **dspp;
6341         struct ucred *tcred;
6342         struct mbuf *m, *m2;
6343         uint32_t copylen;
6344
6345         np = VTONFS(vp);
6346         error = 0;
6347         NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6348             (uintmax_t)len);
6349         /* Loop around, doing I/O for each stripe unit. */
6350         while (len > 0 && error == 0) {
6351                 dspp = nfsfldi_addr(dp, 0);
6352                 fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6353                 stateidp = &flp->nfsfl_ffm[mirror].st;
6354                 NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6355                     mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6356                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6357                         tcred = NFSNEWCRED(cred);
6358                         tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6359                         tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6360                         tcred->cr_ngroups = 1;
6361                 } else
6362                         tcred = cred;
6363                 if (rwflag == NFSV4OPEN_ACCESSREAD)
6364                         copylen = dp->nfsdi_rsize;
6365                 else {
6366                         copylen = dp->nfsdi_wsize;
6367                         if (len > copylen && mp != NULL) {
6368                                 /*
6369                                  * When a mirrored configuration needs to do
6370                                  * multiple writes to each mirror, all writes
6371                                  * except the last one must be a multiple of
6372                                  * 4 bytes.  This is required so that the XDR
6373                                  * does not need padding.
6374                                  * If possible, clip the size to an exact
6375                                  * multiple of the mbuf length, so that the
6376                                  * split will be on an mbuf boundary.
6377                                  */
6378                                 copylen &= 0xfffffffc;
6379                                 if (copylen > mp->m_len)
6380                                         copylen = copylen / mp->m_len *
6381                                             mp->m_len;
6382                         }
6383                 }
6384                 NFSLOCKNODE(np);
6385                 np->n_flag |= NDSCOMMIT;
6386                 NFSUNLOCKNODE(np);
6387                 if (len > copylen && docommit == 0)
6388                         xfer = copylen;
6389                 else
6390                         xfer = len;
6391                 if (docommit != 0) {
6392                         if (error == 0) {
6393                                 /*
6394                                  * Do last mirrored DS commit with this thread.
6395                                  */
6396                                 if (mirror < flp->nfsfl_mirrorcnt - 1)
6397                                         error = nfsio_commitds(vp, off, xfer,
6398                                             *dspp, fhp, dp->nfsdi_vers,
6399                                             dp->nfsdi_minorvers, drpc, tcred,
6400                                             p);
6401                                 else
6402                                         error = nfsrpc_commitds(vp, off, xfer,
6403                                             *dspp, fhp, dp->nfsdi_vers,
6404                                             dp->nfsdi_minorvers, tcred, p);
6405                                 NFSCL_DEBUG(4, "commitds=%d\n", error);
6406                                 if (error != 0 && error != EACCES && error !=
6407                                     ESTALE) {
6408                                         NFSCL_DEBUG(4,
6409                                             "DS layreterr for commit\n");
6410                                         nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6411                                             lyp, *dspp);
6412                                 }
6413                         }
6414                         NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6415                         if (error == 0) {
6416                                 /*
6417                                  * Set both eof and uio_resid = 0 to end any
6418                                  * loops.
6419                                  */
6420                                 *eofp = 1;
6421                                 uiop->uio_resid = 0;
6422                         } else {
6423                                 NFSLOCKNODE(np);
6424                                 np->n_flag &= ~NDSCOMMIT;
6425                                 NFSUNLOCKNODE(np);
6426                         }
6427                 } else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6428                         error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6429                             off, xfer, fhp, 1, dp->nfsdi_vers,
6430                             dp->nfsdi_minorvers, tcred, p);
6431                         NFSCL_DEBUG(4, "readds=%d\n", error);
6432                         if (error != 0 && error != EACCES && error != ESTALE) {
6433                                 NFSCL_DEBUG(4, "DS layreterr for read\n");
6434                                 nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6435                                     *dspp);
6436                         }
6437                 } else {
6438                         if (flp->nfsfl_mirrorcnt == 1) {
6439                                 error = nfsrpc_writeds(vp, uiop, iomode,
6440                                     must_commit, stateidp, *dspp, off, xfer,
6441                                     fhp, 0, 1, dp->nfsdi_vers,
6442                                     dp->nfsdi_minorvers, tcred, p);
6443                                 if (error == 0) {
6444                                         NFSLOCKCLSTATE();
6445                                         lyp->nfsly_flags |= NFSLY_WRITTEN;
6446                                         NFSUNLOCKCLSTATE();
6447                                 }
6448                         } else {
6449                                 m = mp;
6450                                 if (xfer < len) {
6451                                         /* The mbuf list must be split. */
6452                                         m2 = nfsm_split(mp, xfer);
6453                                         if (m2 != NULL)
6454                                                 mp = m2;
6455                                         else {
6456                                                 m_freem(mp);
6457                                                 error = EIO;
6458                                         }
6459                                 }
6460                                 NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6461                                     (uintmax_t)len, (uintmax_t)xfer);
6462                                 /*
6463                                  * Do last write to a mirrored DS with this
6464                                  * thread.
6465                                  */
6466                                 if (error == 0) {
6467                                         if (mirror < flp->nfsfl_mirrorcnt - 1)
6468                                                 error = nfsio_writedsmir(vp,
6469                                                     iomode, must_commit,
6470                                                     stateidp, *dspp, off,
6471                                                     xfer, fhp, m,
6472                                                     dp->nfsdi_vers,
6473                                                     dp->nfsdi_minorvers, drpc,
6474                                                     tcred, p);
6475                                         else
6476                                                 error = nfsrpc_writedsmir(vp,
6477                                                     iomode, must_commit,
6478                                                     stateidp, *dspp, off,
6479                                                     xfer, fhp, m,
6480                                                     dp->nfsdi_vers,
6481                                                     dp->nfsdi_minorvers, tcred,
6482                                                     p);
6483                                 }
6484                                 NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6485                                 if (error != 0 && error != EACCES && error !=
6486                                     ESTALE) {
6487                                         NFSCL_DEBUG(4,
6488                                             "DS layreterr for write\n");
6489                                         nfscl_dserr(NFSV4OP_WRITE, error, dp,
6490                                             lyp, *dspp);
6491                                 }
6492                         }
6493                 }
6494                 NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6495                 if (error == 0) {
6496                         len -= xfer;
6497                         off += xfer;
6498                 }
6499                 if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6500                         NFSFREECRED(tcred);
6501         }
6502         NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6503         return (error);
6504 }
6505
6506 /*
6507  * The actual read RPC done to a DS.
6508  */
6509 static int
6510 nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6511     struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6512     int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6513 {
6514         uint32_t *tl;
6515         int attrflag, error, retlen;
6516         struct nfsrv_descript nfsd;
6517         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6518         struct nfsrv_descript *nd = &nfsd;
6519         struct nfssockreq *nrp;
6520         struct nfsvattr na;
6521
6522         nd->nd_mrep = NULL;
6523         if (vers == 0 || vers == NFS_VER4) {
6524                 nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6525                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6526                     NULL);
6527                 vers = NFS_VER4;
6528                 NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6529                 if (flex != 0)
6530                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6531                 else
6532                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6533         } else {
6534                 nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6535                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6536                     NULL);
6537                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6538                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6539                 NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6540         }
6541         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6542         txdr_hyper(io_off, tl);
6543         *(tl + 2) = txdr_unsigned(len);
6544         nrp = dsp->nfsclds_sockp;
6545         NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6546         if (nrp == NULL)
6547                 /* If NULL, use the MDS socket. */
6548                 nrp = &nmp->nm_sockreq;
6549         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6550             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6551         NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6552             error);
6553         if (error != 0)
6554                 return (error);
6555         if (vers == NFS_VER3) {
6556                 error = nfscl_postop_attr(nd, &na, &attrflag, NULL);
6557                 NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6558                 if (error != 0)
6559                         goto nfsmout;
6560         }
6561         if (nd->nd_repstat != 0) {
6562                 error = nd->nd_repstat;
6563                 goto nfsmout;
6564         }
6565         if (vers == NFS_VER3) {
6566                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6567                 *eofp = fxdr_unsigned(int, *(tl + 1));
6568         } else {
6569                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6570                 *eofp = fxdr_unsigned(int, *tl);
6571         }
6572         NFSM_STRSIZ(retlen, len);
6573         NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6574         error = nfsm_mbufuio(nd, uiop, retlen);
6575 nfsmout:
6576         if (nd->nd_mrep != NULL)
6577                 m_freem(nd->nd_mrep);
6578         return (error);
6579 }
6580
6581 /*
6582  * The actual write RPC done to a DS.
6583  */
6584 static int
6585 nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6586     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6587     struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6588     struct ucred *cred, NFSPROC_T *p)
6589 {
6590         uint32_t *tl;
6591         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6592         int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6593         int32_t backup;
6594         struct nfsrv_descript nfsd;
6595         struct nfsrv_descript *nd = &nfsd;
6596         struct nfssockreq *nrp;
6597         struct nfsvattr na;
6598
6599         KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6600         nd->nd_mrep = NULL;
6601         if (vers == 0 || vers == NFS_VER4) {
6602                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6603                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6604                     NULL);
6605                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6606                 vers = NFS_VER4;
6607                 if (flex != 0)
6608                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6609                 else
6610                         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6611                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6612         } else {
6613                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6614                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6615                     NULL);
6616                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6617                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6618                 NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6619                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6620         }
6621         txdr_hyper(io_off, tl);
6622         tl += 2;
6623         if (vers == NFS_VER3)
6624                 *tl++ = txdr_unsigned(len);
6625         *tl++ = txdr_unsigned(*iomode);
6626         *tl = txdr_unsigned(len);
6627         error = nfsm_uiombuf(nd, uiop, len);
6628         if (error != 0) {
6629                 m_freem(nd->nd_mreq);
6630                 return (error);
6631         }
6632         nrp = dsp->nfsclds_sockp;
6633         if (nrp == NULL)
6634                 /* If NULL, use the MDS socket. */
6635                 nrp = &nmp->nm_sockreq;
6636         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6637             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6638         NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6639             nd->nd_repstat);
6640         if (error != 0)
6641                 return (error);
6642         if (nd->nd_repstat != 0) {
6643                 /*
6644                  * In case the rpc gets retried, roll
6645                  * the uio fileds changed by nfsm_uiombuf()
6646                  * back.
6647                  */
6648                 uiop->uio_offset -= len;
6649                 uiop->uio_resid += len;
6650                 uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6651                 uiop->uio_iov->iov_len += len;
6652                 error = nd->nd_repstat;
6653         } else {
6654                 if (vers == NFS_VER3) {
6655                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6656                             NULL);
6657                         NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6658                         if (error != 0)
6659                                 goto nfsmout;
6660                 }
6661                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6662                 rlen = fxdr_unsigned(int, *tl++);
6663                 NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6664                 if (rlen == 0) {
6665                         error = NFSERR_IO;
6666                         goto nfsmout;
6667                 } else if (rlen < len) {
6668                         backup = len - rlen;
6669                         uiop->uio_iov->iov_base =
6670                             (char *)uiop->uio_iov->iov_base - backup;
6671                         uiop->uio_iov->iov_len += backup;
6672                         uiop->uio_offset -= backup;
6673                         uiop->uio_resid += backup;
6674                         len = rlen;
6675                 }
6676                 commit = fxdr_unsigned(int, *tl++);
6677
6678                 /*
6679                  * Return the lowest commitment level
6680                  * obtained by any of the RPCs.
6681                  */
6682                 if (committed == NFSWRITE_FILESYNC)
6683                         committed = commit;
6684                 else if (committed == NFSWRITE_DATASYNC &&
6685                     commit == NFSWRITE_UNSTABLE)
6686                         committed = commit;
6687                 if (commit_thru_mds != 0) {
6688                         NFSLOCKMNT(nmp);
6689                         if (!NFSHASWRITEVERF(nmp)) {
6690                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6691                                 NFSSETWRITEVERF(nmp);
6692                         } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
6693                             *must_commit != 2) {
6694                                 *must_commit = 1;
6695                                 NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
6696                         }
6697                         NFSUNLOCKMNT(nmp);
6698                 } else {
6699                         NFSLOCKDS(dsp);
6700                         if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6701                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6702                                 dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6703                         } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
6704                             *must_commit != 2) {
6705                                 *must_commit = 1;
6706                                 NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6707                         }
6708                         NFSUNLOCKDS(dsp);
6709                 }
6710         }
6711 nfsmout:
6712         if (nd->nd_mrep != NULL)
6713                 m_freem(nd->nd_mrep);
6714         *iomode = committed;
6715         if (nd->nd_repstat != 0 && error == 0)
6716                 error = nd->nd_repstat;
6717         return (error);
6718 }
6719
6720 /*
6721  * The actual write RPC done to a DS.
6722  * This variant is called from a separate kernel process for mirrors.
6723  * Any short write is considered an IO error.
6724  */
6725 static int
6726 nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6727     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6728     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6729     struct ucred *cred, NFSPROC_T *p)
6730 {
6731         uint32_t *tl;
6732         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6733         int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
6734         struct nfsrv_descript nfsd;
6735         struct nfsrv_descript *nd = &nfsd;
6736         struct nfssockreq *nrp;
6737         struct nfsvattr na;
6738
6739         nd->nd_mrep = NULL;
6740         if (vers == 0 || vers == NFS_VER4) {
6741                 nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6742                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6743                     NULL);
6744                 vers = NFS_VER4;
6745                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
6746                     minorvers);
6747                 nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6748                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6749         } else {
6750                 nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6751                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6752                     NULL);
6753                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6754                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6755                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
6756                 NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6757         }
6758         txdr_hyper(io_off, tl);
6759         tl += 2;
6760         if (vers == NFS_VER3)
6761                 *tl++ = txdr_unsigned(len);
6762         *tl++ = txdr_unsigned(*iomode);
6763         *tl = txdr_unsigned(len);
6764         if (len > 0) {
6765                 /* Put data in mbuf chain. */
6766                 nd->nd_mb->m_next = m;
6767         }
6768         nrp = dsp->nfsclds_sockp;
6769         if (nrp == NULL)
6770                 /* If NULL, use the MDS socket. */
6771                 nrp = &nmp->nm_sockreq;
6772         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6773             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6774         NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
6775             nd->nd_repstat);
6776         if (error != 0)
6777                 return (error);
6778         if (nd->nd_repstat != 0)
6779                 error = nd->nd_repstat;
6780         else {
6781                 if (vers == NFS_VER3) {
6782                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6783                             NULL);
6784                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
6785                             error);
6786                         if (error != 0)
6787                                 goto nfsmout;
6788                 }
6789                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6790                 rlen = fxdr_unsigned(int, *tl++);
6791                 NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
6792                     rlen);
6793                 if (rlen != len) {
6794                         error = NFSERR_IO;
6795                         NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
6796                             len, rlen);
6797                         goto nfsmout;
6798                 }
6799                 commit = fxdr_unsigned(int, *tl++);
6800
6801                 /*
6802                  * Return the lowest commitment level
6803                  * obtained by any of the RPCs.
6804                  */
6805                 if (committed == NFSWRITE_FILESYNC)
6806                         committed = commit;
6807                 else if (committed == NFSWRITE_DATASYNC &&
6808                     commit == NFSWRITE_UNSTABLE)
6809                         committed = commit;
6810                 NFSLOCKDS(dsp);
6811                 if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
6812                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6813                         dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
6814                 } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
6815                     *must_commit != 2) {
6816                         *must_commit = 1;
6817                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
6818                 }
6819                 NFSUNLOCKDS(dsp);
6820         }
6821 nfsmout:
6822         if (nd->nd_mrep != NULL)
6823                 m_freem(nd->nd_mrep);
6824         *iomode = committed;
6825         if (nd->nd_repstat != 0 && error == 0)
6826                 error = nd->nd_repstat;
6827         return (error);
6828 }
6829
6830 /*
6831  * Start up the thread that will execute nfsrpc_writedsmir().
6832  */
6833 static void
6834 start_writedsmir(void *arg, int pending)
6835 {
6836         struct nfsclwritedsdorpc *drpc;
6837
6838         drpc = (struct nfsclwritedsdorpc *)arg;
6839         drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
6840             &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
6841             drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
6842             drpc->p);
6843         drpc->done = 1;
6844         crfree(drpc->cred);
6845         NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
6846 }
6847
6848 /*
6849  * Set up the write DS mirror call for the pNFS I/O thread.
6850  */
6851 static int
6852 nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
6853     nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
6854     struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
6855     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6856 {
6857         int error, ret;
6858
6859         error = 0;
6860         drpc->done = 0;
6861         drpc->vp = vp;
6862         drpc->iomode = *iomode;
6863         drpc->must_commit = *must_commit;
6864         drpc->stateidp = stateidp;
6865         drpc->dsp = dsp;
6866         drpc->off = off;
6867         drpc->len = len;
6868         drpc->fhp = fhp;
6869         drpc->m = m;
6870         drpc->vers = vers;
6871         drpc->minorvers = minorvers;
6872         drpc->cred = crhold(cred);
6873         drpc->p = p;
6874         drpc->inprog = 0;
6875         ret = EIO;
6876         if (nfs_pnfsiothreads != 0) {
6877                 ret = nfs_pnfsio(start_writedsmir, drpc);
6878                 NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
6879         }
6880         if (ret != 0) {
6881                 error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
6882                     stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
6883                 crfree(drpc->cred);
6884         }
6885         NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
6886         return (error);
6887 }
6888
6889 /*
6890  * Free up the nfsclds structure.
6891  */
6892 void
6893 nfscl_freenfsclds(struct nfsclds *dsp)
6894 {
6895         int i;
6896
6897         if (dsp == NULL)
6898                 return;
6899         if (dsp->nfsclds_sockp != NULL) {
6900                 NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
6901                 NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
6902                 free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
6903                 free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
6904         }
6905         NFSFREEMUTEX(&dsp->nfsclds_mtx);
6906         NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
6907         for (i = 0; i < NFSV4_CBSLOTS; i++) {
6908                 if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
6909                         m_freem(
6910                             dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
6911         }
6912         free(dsp, M_NFSCLDS);
6913 }
6914
6915 static enum nfsclds_state
6916 nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
6917     struct nfsclds **retdspp, uint32_t *sequencep)
6918 {
6919         struct nfsclds *dsp;
6920         int fndseq;
6921
6922         /*
6923          * Search the list of nfsclds structures for one with the same
6924          * server.
6925          */
6926         fndseq = 0;
6927         TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
6928                 if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
6929                     dsp->nfsclds_servownlen != 0 &&
6930                     !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
6931                     dsp->nfsclds_servownlen) &&
6932                     dsp->nfsclds_sess.nfsess_defunct == 0) {
6933                         NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
6934                             TAILQ_FIRST(&nmp->nm_sess), dsp,
6935                             dsp->nfsclds_flags);
6936                         if (fndseq == 0) {
6937                                 /* Get sequenceid# from first entry. */
6938                                 *sequencep =
6939                                     dsp->nfsclds_sess.nfsess_sequenceid;
6940                                 fndseq = 1;
6941                         }
6942                         /* Server major id matches. */
6943                         if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
6944                                 *retdspp = dsp;
6945                                 return (NFSDSP_USETHISSESSION);
6946                         }
6947                 }
6948         }
6949         if (fndseq != 0)
6950                 return (NFSDSP_SEQTHISSESSION);
6951         return (NFSDSP_NOTFOUND);
6952 }
6953
6954 /*
6955  * NFS commit rpc to a NFSv4.1 DS.
6956  */
6957 static int
6958 nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
6959     struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
6960     NFSPROC_T *p)
6961 {
6962         uint32_t *tl;
6963         struct nfsrv_descript nfsd, *nd = &nfsd;
6964         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6965         struct nfssockreq *nrp;
6966         struct nfsvattr na;
6967         int attrflag, error;
6968
6969         nd->nd_mrep = NULL;
6970         if (vers == 0 || vers == NFS_VER4) {
6971                 nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
6972                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6973                     NULL);
6974                 vers = NFS_VER4;
6975         } else {
6976                 nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
6977                     fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6978                     NULL);
6979                 NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
6980                 NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
6981         }
6982         NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
6983             minorvers);
6984         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6985         txdr_hyper(offset, tl);
6986         tl += 2;
6987         *tl = txdr_unsigned(cnt);
6988         nrp = dsp->nfsclds_sockp;
6989         if (nrp == NULL)
6990                 /* If NULL, use the MDS socket. */
6991                 nrp = &nmp->nm_sockreq;
6992         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6993             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6994         NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
6995             nd->nd_repstat);
6996         if (error != 0)
6997                 return (error);
6998         if (nd->nd_repstat == 0) {
6999                 if (vers == NFS_VER3) {
7000                         error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7001                             NULL);
7002                         NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7003                         if (error != 0)
7004                                 goto nfsmout;
7005                 }
7006                 NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7007                 NFSLOCKDS(dsp);
7008                 if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7009                         NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7010                         error = NFSERR_STALEWRITEVERF;
7011                 }
7012                 NFSUNLOCKDS(dsp);
7013         }
7014 nfsmout:
7015         if (error == 0 && nd->nd_repstat != 0)
7016                 error = nd->nd_repstat;
7017         m_freem(nd->nd_mrep);
7018         return (error);
7019 }
7020
7021 /*
7022  * Start up the thread that will execute nfsrpc_commitds().
7023  */
7024 static void
7025 start_commitds(void *arg, int pending)
7026 {
7027         struct nfsclwritedsdorpc *drpc;
7028
7029         drpc = (struct nfsclwritedsdorpc *)arg;
7030         drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7031             drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7032             drpc->p);
7033         drpc->done = 1;
7034         crfree(drpc->cred);
7035         NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7036 }
7037
7038 /*
7039  * Set up the commit DS mirror call for the pNFS I/O thread.
7040  */
7041 static int
7042 nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7043     struct nfsfh *fhp, int vers, int minorvers,
7044     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7045 {
7046         int error, ret;
7047
7048         error = 0;
7049         drpc->done = 0;
7050         drpc->vp = vp;
7051         drpc->off = offset;
7052         drpc->len = cnt;
7053         drpc->dsp = dsp;
7054         drpc->fhp = fhp;
7055         drpc->vers = vers;
7056         drpc->minorvers = minorvers;
7057         drpc->cred = crhold(cred);
7058         drpc->p = p;
7059         drpc->inprog = 0;
7060         ret = EIO;
7061         if (nfs_pnfsiothreads != 0) {
7062                 ret = nfs_pnfsio(start_commitds, drpc);
7063                 NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7064         }
7065         if (ret != 0) {
7066                 error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7067                     minorvers, cred, p);
7068                 crfree(drpc->cred);
7069         }
7070         NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7071         return (error);
7072 }
7073
7074 /*
7075  * NFS Advise rpc
7076  */
7077 int
7078 nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7079     struct ucred *cred, NFSPROC_T *p)
7080 {
7081         u_int32_t *tl;
7082         struct nfsrv_descript nfsd, *nd = &nfsd;
7083         nfsattrbit_t hints;
7084         int error;
7085
7086         NFSZERO_ATTRBIT(&hints);
7087         if (advise == POSIX_FADV_WILLNEED)
7088                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7089         else if (advise == POSIX_FADV_DONTNEED)
7090                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7091         else
7092                 return (0);
7093         NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7094         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7095         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7096         txdr_hyper(offset, tl);
7097         tl += 2;
7098         txdr_hyper(cnt, tl);
7099         nfsrv_putattrbit(nd, &hints);
7100         error = nfscl_request(nd, vp, p, cred, NULL);
7101         if (error != 0)
7102                 return (error);
7103         if (nd->nd_repstat != 0)
7104                 error = nd->nd_repstat;
7105         m_freem(nd->nd_mrep);
7106         return (error);
7107 }
7108
7109 #ifdef notyet
7110 /*
7111  * NFS advise rpc to a NFSv4.2 DS.
7112  */
7113 static int
7114 nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7115     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7116     struct ucred *cred, NFSPROC_T *p)
7117 {
7118         uint32_t *tl;
7119         struct nfsrv_descript nfsd, *nd = &nfsd;
7120         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7121         struct nfssockreq *nrp;
7122         nfsattrbit_t hints;
7123         int error;
7124
7125         /* For NFS DSs prior to NFSv4.2, just return OK. */
7126         if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7127                 return (0);
7128         NFSZERO_ATTRBIT(&hints);
7129         if (advise == POSIX_FADV_WILLNEED)
7130                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7131         else if (advise == POSIX_FADV_DONTNEED)
7132                 NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7133         else
7134                 return (0);
7135         nd->nd_mrep = NULL;
7136         nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7137             fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7138         vers = NFS_VER4;
7139         NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7140             minorvers);
7141         nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7142         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7143         txdr_hyper(offset, tl);
7144         tl += 2;
7145         *tl = txdr_unsigned(cnt);
7146         nfsrv_putattrbit(nd, &hints);
7147         nrp = dsp->nfsclds_sockp;
7148         if (nrp == NULL)
7149                 /* If NULL, use the MDS socket. */
7150                 nrp = &nmp->nm_sockreq;
7151         error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7152             NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7153         NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7154             nd->nd_repstat);
7155         if (error != 0)
7156                 return (error);
7157         if (nd->nd_repstat != 0)
7158                 error = nd->nd_repstat;
7159         m_freem(nd->nd_mrep);
7160         return (error);
7161 }
7162
7163 /*
7164  * Start up the thread that will execute nfsrpc_commitds().
7165  */
7166 static void
7167 start_adviseds(void *arg, int pending)
7168 {
7169         struct nfsclwritedsdorpc *drpc;
7170
7171         drpc = (struct nfsclwritedsdorpc *)arg;
7172         drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7173             drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7174             drpc->cred, drpc->p);
7175         drpc->done = 1;
7176         crfree(drpc->cred);
7177         NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7178 }
7179
7180 /*
7181  * Set up the advise DS mirror call for the pNFS I/O thread.
7182  */
7183 static int
7184 nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7185     struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7186     struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7187 {
7188         int error, ret;
7189
7190         error = 0;
7191         drpc->done = 0;
7192         drpc->vp = vp;
7193         drpc->off = offset;
7194         drpc->len = cnt;
7195         drpc->advise = advise;
7196         drpc->dsp = dsp;
7197         drpc->fhp = fhp;
7198         drpc->vers = vers;
7199         drpc->minorvers = minorvers;
7200         drpc->cred = crhold(cred);
7201         drpc->p = p;
7202         drpc->inprog = 0;
7203         ret = EIO;
7204         if (nfs_pnfsiothreads != 0) {
7205                 ret = nfs_pnfsio(start_adviseds, drpc);
7206                 NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7207         }
7208         if (ret != 0) {
7209                 error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7210                     minorvers, cred, p);
7211                 crfree(drpc->cred);
7212         }
7213         NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7214         return (error);
7215 }
7216 #endif  /* notyet */
7217
7218 /*
7219  * Do the Allocate operation, retrying for recovery.
7220  */
7221 int
7222 nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7223     int *attrflagp, struct ucred *cred, NFSPROC_T *p, void *stuff)
7224 {
7225         int error, expireret = 0, retrycnt, nostateid;
7226         uint32_t clidrev = 0;
7227         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7228         struct nfsfh *nfhp = NULL;
7229         nfsv4stateid_t stateid;
7230         off_t tmp_off;
7231         void *lckp;
7232
7233         if (len < 0)
7234                 return (EINVAL);
7235         if (len == 0)
7236                 return (0);
7237         tmp_off = off + len;
7238         NFSLOCKMNT(nmp);
7239         if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7240                 NFSUNLOCKMNT(nmp);
7241                 return (EFBIG);
7242         }
7243         if (nmp->nm_clp != NULL)
7244                 clidrev = nmp->nm_clp->nfsc_clientidrev;
7245         NFSUNLOCKMNT(nmp);
7246         nfhp = VTONFS(vp)->n_fhp;
7247         retrycnt = 0;
7248         do {
7249                 lckp = NULL;
7250                 nostateid = 0;
7251                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7252                     NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7253                 if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7254                     stateid.other[2] == 0) {
7255                         nostateid = 1;
7256                         NFSCL_DEBUG(1, "stateid0 in allocate\n");
7257                 }
7258
7259                 /*
7260                  * Not finding a stateid should probably never happen,
7261                  * but just return an error for this case.
7262                  */
7263                 if (nostateid != 0)
7264                         error = EIO;
7265                 else
7266                         error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7267                             nap, attrflagp, cred, p, stuff);
7268                 if (error == NFSERR_STALESTATEID)
7269                         nfscl_initiate_recovery(nmp->nm_clp);
7270                 if (lckp != NULL)
7271                         nfscl_lockderef(lckp);
7272                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7273                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7274                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7275                         (void) nfs_catnap(PZERO, error, "nfs_allocate");
7276                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
7277                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
7278                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7279                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
7280                         error = EIO;
7281                 }
7282                 retrycnt++;
7283         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7284             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7285             error == NFSERR_STALEDONTRECOVER ||
7286             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7287             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7288              expireret == 0 && clidrev != 0 && retrycnt < 4));
7289         if (error != 0 && retrycnt >= 4)
7290                 error = EIO;
7291         return (error);
7292 }
7293
7294 /*
7295  * The allocate RPC.
7296  */
7297 static int
7298 nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7299     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p,
7300     void *stuff)
7301 {
7302         uint32_t *tl;
7303         int error;
7304         struct nfsrv_descript nfsd;
7305         struct nfsrv_descript *nd = &nfsd;
7306         nfsattrbit_t attrbits;
7307
7308         *attrflagp = 0;
7309         NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7310         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7311         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7312         txdr_hyper(off, tl); tl += 2;
7313         txdr_hyper(len, tl); tl += 2;
7314         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7315         NFSGETATTR_ATTRBIT(&attrbits);
7316         nfsrv_putattrbit(nd, &attrbits);
7317         error = nfscl_request(nd, vp, p, cred, stuff);
7318         if (error != 0)
7319                 return (error);
7320         if (nd->nd_repstat == 0) {
7321                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7322                 error = nfsm_loadattr(nd, nap);
7323                 if (error == 0)
7324                         *attrflagp = NFS_LATTR_NOSHRINK;
7325         } else
7326                 error = nd->nd_repstat;
7327 nfsmout:
7328         m_freem(nd->nd_mrep);
7329         return (error);
7330 }
7331
7332 /*
7333  * Set up the XDR arguments for the LayoutGet operation.
7334  */
7335 static void
7336 nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7337     uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7338     int layoutlen, int usecurstateid)
7339 {
7340         uint32_t *tl;
7341
7342         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7343             NFSX_STATEID);
7344         *tl++ = newnfs_false;           /* Don't signal availability. */
7345         *tl++ = txdr_unsigned(layouttype);
7346         *tl++ = txdr_unsigned(iomode);
7347         txdr_hyper(offset, tl);
7348         tl += 2;
7349         txdr_hyper(len, tl);
7350         tl += 2;
7351         txdr_hyper(minlen, tl);
7352         tl += 2;
7353         if (usecurstateid != 0) {
7354                 /* Special stateid for Current stateid. */
7355                 *tl++ = txdr_unsigned(1);
7356                 *tl++ = 0;
7357                 *tl++ = 0;
7358                 *tl++ = 0;
7359         } else {
7360                 *tl++ = txdr_unsigned(stateidp->seqid);
7361                 NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7362                 *tl++ = stateidp->other[0];
7363                 *tl++ = stateidp->other[1];
7364                 *tl++ = stateidp->other[2];
7365         }
7366         *tl = txdr_unsigned(layoutlen);
7367 }
7368
7369 /*
7370  * Parse the reply for a successful LayoutGet operation.
7371  */
7372 static int
7373 nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7374     nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7375 {
7376         uint32_t *tl;
7377         struct nfsclflayout *flp, *prevflp, *tflp;
7378         int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7379         int m, mirrorcnt;
7380         uint64_t retlen, off;
7381         struct nfsfh *nfhp;
7382         uint8_t *cp;
7383         uid_t user;
7384         gid_t grp;
7385
7386         NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7387         error = 0;
7388         flp = NULL;
7389         gotiomode = -1;
7390         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7391         if (*tl++ != 0)
7392                 *retonclosep = 1;
7393         else
7394                 *retonclosep = 0;
7395         stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7396         NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7397             (int)stateidp->seqid);
7398         stateidp->other[0] = *tl++;
7399         stateidp->other[1] = *tl++;
7400         stateidp->other[2] = *tl++;
7401         cnt = fxdr_unsigned(int, *tl);
7402         NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7403         if (cnt <= 0 || cnt > 10000) {
7404                 /* Don't accept more than 10000 layouts in reply. */
7405                 error = NFSERR_BADXDR;
7406                 goto nfsmout;
7407         }
7408         for (i = 0; i < cnt; i++) {
7409                 /* Dissect to the layout type. */
7410                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7411                     3 * NFSX_UNSIGNED);
7412                 off = fxdr_hyper(tl); tl += 2;
7413                 retlen = fxdr_hyper(tl); tl += 2;
7414                 iomode = fxdr_unsigned(int, *tl++);
7415                 laytype = fxdr_unsigned(int, *tl);
7416                 NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7417                     (uintmax_t)off, (uintmax_t)retlen, iomode);
7418                 /* Ignore length of layout body for now. */
7419                 if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7420                         /* Parse the File layout up to fhcnt. */
7421                         NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7422                             NFSX_HYPER + NFSX_V4DEVICEID);
7423                         fhcnt = fxdr_unsigned(int, *(tl + 4 +
7424                             NFSX_V4DEVICEID / NFSX_UNSIGNED));
7425                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7426                         if (fhcnt < 0 || fhcnt > 100) {
7427                                 /* Don't accept more than 100 file handles. */
7428                                 error = NFSERR_BADXDR;
7429                                 goto nfsmout;
7430                         }
7431                         if (fhcnt > 0)
7432                                 flp = malloc(sizeof(*flp) + fhcnt *
7433                                     sizeof(struct nfsfh *), M_NFSFLAYOUT,
7434                                     M_WAITOK);
7435                         else
7436                                 flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7437                                     M_WAITOK);
7438                         flp->nfsfl_flags = NFSFL_FILE;
7439                         flp->nfsfl_fhcnt = 0;
7440                         flp->nfsfl_devp = NULL;
7441                         flp->nfsfl_off = off;
7442                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7443                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7444                         else
7445                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7446                         flp->nfsfl_iomode = iomode;
7447                         if (gotiomode == -1)
7448                                 gotiomode = flp->nfsfl_iomode;
7449                         /* Ignore layout body length for now. */
7450                         NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7451                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7452                         flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7453                         NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7454                         mtx_lock(&nmp->nm_mtx);
7455                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7456                             NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7457                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7458                         mtx_unlock(&nmp->nm_mtx);
7459                         flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7460                         flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7461                         NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7462                             flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7463                         for (j = 0; j < fhcnt; j++) {
7464                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7465                                 nfhlen = fxdr_unsigned(int, *tl);
7466                                 if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7467                                         error = NFSERR_BADXDR;
7468                                         goto nfsmout;
7469                                 }
7470                                 nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7471                                     M_NFSFH, M_WAITOK);
7472                                 flp->nfsfl_fh[j] = nfhp;
7473                                 flp->nfsfl_fhcnt++;
7474                                 nfhp->nfh_len = nfhlen;
7475                                 NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7476                                 NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7477                         }
7478                 } else if (laytype == NFSLAYOUT_FLEXFILE) {
7479                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7480                             NFSX_HYPER);
7481                         mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7482                         NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7483                         if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7484                                 error = NFSERR_BADXDR;
7485                                 goto nfsmout;
7486                         }
7487                         flp = malloc(sizeof(*flp) + mirrorcnt *
7488                             sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7489                         flp->nfsfl_flags = NFSFL_FLEXFILE;
7490                         flp->nfsfl_mirrorcnt = mirrorcnt;
7491                         for (j = 0; j < mirrorcnt; j++)
7492                                 flp->nfsfl_ffm[j].devp = NULL;
7493                         flp->nfsfl_off = off;
7494                         if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7495                                 flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7496                         else
7497                                 flp->nfsfl_end = flp->nfsfl_off + retlen;
7498                         flp->nfsfl_iomode = iomode;
7499                         if (gotiomode == -1)
7500                                 gotiomode = flp->nfsfl_iomode;
7501                         flp->nfsfl_stripeunit = fxdr_hyper(tl);
7502                         NFSCL_DEBUG(4, "stripeunit=%ju\n",
7503                             (uintmax_t)flp->nfsfl_stripeunit);
7504                         for (j = 0; j < mirrorcnt; j++) {
7505                                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7506                                 k = fxdr_unsigned(int, *tl);
7507                                 if (k < 1 || k > 128) {
7508                                         error = NFSERR_BADXDR;
7509                                         goto nfsmout;
7510                                 }
7511                                 NFSCL_DEBUG(4, "servercnt=%d\n", k);
7512                                 for (l = 0; l < k; l++) {
7513                                         NFSM_DISSECT(tl, uint32_t *,
7514                                             NFSX_V4DEVICEID + NFSX_STATEID +
7515                                             2 * NFSX_UNSIGNED);
7516                                         if (l == 0) {
7517                                                 /* Just use the first server. */
7518                                                 NFSBCOPY(tl,
7519                                                     flp->nfsfl_ffm[j].dev,
7520                                                     NFSX_V4DEVICEID);
7521                                                 tl += (NFSX_V4DEVICEID /
7522                                                     NFSX_UNSIGNED);
7523                                                 tl++;
7524                                                 flp->nfsfl_ffm[j].st.seqid =
7525                                                     *tl++;
7526                                                 flp->nfsfl_ffm[j].st.other[0] =
7527                                                     *tl++;
7528                                                 flp->nfsfl_ffm[j].st.other[1] =
7529                                                     *tl++;
7530                                                 flp->nfsfl_ffm[j].st.other[2] =
7531                                                     *tl++;
7532                                                 NFSCL_DEBUG(4, "st.seqid=%u "
7533                                                  "st.o0=0x%x st.o1=0x%x "
7534                                                  "st.o2=0x%x\n",
7535                                                  flp->nfsfl_ffm[j].st.seqid,
7536                                                  flp->nfsfl_ffm[j].st.other[0],
7537                                                  flp->nfsfl_ffm[j].st.other[1],
7538                                                  flp->nfsfl_ffm[j].st.other[2]);
7539                                         } else
7540                                                 tl += ((NFSX_V4DEVICEID +
7541                                                     NFSX_STATEID +
7542                                                     NFSX_UNSIGNED) /
7543                                                     NFSX_UNSIGNED);
7544                                         fhcnt = fxdr_unsigned(int, *tl);
7545                                         NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7546                                         if (fhcnt < 1 ||
7547                                             fhcnt > NFSDEV_MAXVERS) {
7548                                                 error = NFSERR_BADXDR;
7549                                                 goto nfsmout;
7550                                         }
7551                                         for (m = 0; m < fhcnt; m++) {
7552                                                 NFSM_DISSECT(tl, uint32_t *,
7553                                                     NFSX_UNSIGNED);
7554                                                 nfhlen = fxdr_unsigned(int,
7555                                                     *tl);
7556                                                 NFSCL_DEBUG(4, "nfhlen=%d\n",
7557                                                     nfhlen);
7558                                                 if (nfhlen <= 0 || nfhlen >
7559                                                     NFSX_V4FHMAX) {
7560                                                         error = NFSERR_BADXDR;
7561                                                         goto nfsmout;
7562                                                 }
7563                                                 NFSM_DISSECT(cp, uint8_t *,
7564                                                     NFSM_RNDUP(nfhlen));
7565                                                 if (l == 0) {
7566                                                         flp->nfsfl_ffm[j].fhcnt 
7567                                                             = fhcnt;
7568                                                         nfhp = malloc(
7569                                                             sizeof(*nfhp) +
7570                                                             nfhlen - 1, M_NFSFH,
7571                                                             M_WAITOK);
7572                                                         flp->nfsfl_ffm[j].fh[m]
7573                                                             = nfhp;
7574                                                         nfhp->nfh_len = nfhlen;
7575                                                         NFSBCOPY(cp,
7576                                                             nfhp->nfh_fh,
7577                                                             nfhlen);
7578                                                         NFSCL_DEBUG(4,
7579                                                             "got fh\n");
7580                                                 }
7581                                         }
7582                                         /* Now, get the ffsd_user/ffds_group. */
7583                                         error = nfsrv_parseug(nd, 0, &user,
7584                                             &grp, curthread);
7585                                         NFSCL_DEBUG(4, "after parseu=%d\n",
7586                                             error);
7587                                         if (error == 0)
7588                                                 error = nfsrv_parseug(nd, 1,
7589                                                     &user, &grp, curthread);
7590                                         NFSCL_DEBUG(4, "aft parseg=%d\n",
7591                                             grp);
7592                                         if (error != 0)
7593                                                 goto nfsmout;
7594                                         NFSCL_DEBUG(4, "user=%d group=%d\n",
7595                                             user, grp);
7596                                         if (l == 0) {
7597                                                 flp->nfsfl_ffm[j].user = user;
7598                                                 flp->nfsfl_ffm[j].group = grp;
7599                                                 NFSCL_DEBUG(4,
7600                                                     "usr=%d grp=%d\n", user,
7601                                                     grp);
7602                                         }
7603                                 }
7604                         }
7605                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7606                         flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7607 #ifdef notnow
7608                         /*
7609                          * At this time, there is no flag.
7610                          * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7611                          * added, or it may never exist?
7612                          */
7613                         mtx_lock(&nmp->nm_mtx);
7614                         if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7615                             NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7616                                 nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7617                         mtx_unlock(&nmp->nm_mtx);
7618 #endif
7619                         flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7620                         NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7621                             flp->nfsfl_fflags, flp->nfsfl_statshint);
7622                 } else {
7623                         error = NFSERR_BADXDR;
7624                         goto nfsmout;
7625                 }
7626                 if (flp->nfsfl_iomode == gotiomode) {
7627                         /* Keep the list in increasing offset order. */
7628                         tflp = LIST_FIRST(flhp);
7629                         prevflp = NULL;
7630                         while (tflp != NULL &&
7631                             tflp->nfsfl_off < flp->nfsfl_off) {
7632                                 prevflp = tflp;
7633                                 tflp = LIST_NEXT(tflp, nfsfl_list);
7634                         }
7635                         if (prevflp == NULL)
7636                                 LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7637                         else
7638                                 LIST_INSERT_AFTER(prevflp, flp,
7639                                     nfsfl_list);
7640                         NFSCL_DEBUG(4, "flp inserted\n");
7641                 } else {
7642                         printf("nfscl_layoutget(): got wrong iomode\n");
7643                         nfscl_freeflayout(flp);
7644                 }
7645                 flp = NULL;
7646         }
7647 nfsmout:
7648         NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7649         if (error != 0 && flp != NULL)
7650                 nfscl_freeflayout(flp);
7651         return (error);
7652 }
7653
7654 /*
7655  * Parse a user/group digit string.
7656  */
7657 static int
7658 nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7659     NFSPROC_T *p)
7660 {
7661         uint32_t *tl;
7662         char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7663         uint32_t len = 0;
7664         int error = 0;
7665
7666         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7667         len = fxdr_unsigned(uint32_t, *tl);
7668         str = NULL;
7669         if (len > NFSV4_OPAQUELIMIT) {
7670                 error = NFSERR_BADXDR;
7671                 goto nfsmout;
7672         }
7673         NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7674         if (len == 0) {
7675                 if (dogrp != 0)
7676                         *gidp = GID_NOGROUP;
7677                 else
7678                         *uidp = UID_NOBODY;
7679                 return (0);
7680         }
7681         if (len > NFSV4_SMALLSTR)
7682                 str = malloc(len + 1, M_TEMP, M_WAITOK);
7683         else
7684                 str = str0;
7685         NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7686         NFSBCOPY(cp, str, len);
7687         str[len] = '\0';
7688         NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
7689         if (dogrp != 0)
7690                 error = nfsv4_strtogid(nd, str, len, gidp);
7691         else
7692                 error = nfsv4_strtouid(nd, str, len, uidp);
7693 nfsmout:
7694         if (len > NFSV4_SMALLSTR)
7695                 free(str, M_TEMP);
7696         NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
7697         return (error);
7698 }
7699
7700 /*
7701  * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
7702  * so that it does both an Open and a Layoutget.
7703  */
7704 static int
7705 nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7706     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7707     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7708     struct ucred *cred, NFSPROC_T *p)
7709 {
7710         struct nfscllayout *lyp;
7711         struct nfsclflayout *flp;
7712         struct nfsclflayouthead flh;
7713         int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
7714         int layouttype, laystat;
7715         nfsv4stateid_t stateid;
7716         struct nfsclsession *tsep;
7717
7718         error = 0;
7719         if (NFSHASFLEXFILE(nmp))
7720                 layouttype = NFSLAYOUT_FLEXFILE;
7721         else
7722                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
7723         /*
7724          * If lyp is returned non-NULL, there will be a refcnt (shared lock)
7725          * on it, iff flp != NULL or a lock (exclusive lock) on it iff
7726          * flp == NULL.
7727          */
7728         lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
7729             &recalled);
7730         NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
7731         if (lyp == NULL)
7732                 islocked = 0;
7733         else if (flp != NULL)
7734                 islocked = 1;
7735         else
7736                 islocked = 2;
7737         if ((lyp == NULL || flp == NULL) && recalled == 0) {
7738                 LIST_INIT(&flh);
7739                 tsep = nfsmnt_mdssession(nmp);
7740                 layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
7741                     3 * NFSX_UNSIGNED);
7742                 if (lyp == NULL)
7743                         usecurstateid = 1;
7744                 else {
7745                         usecurstateid = 0;
7746                         stateid.seqid = lyp->nfsly_stateid.seqid;
7747                         stateid.other[0] = lyp->nfsly_stateid.other[0];
7748                         stateid.other[1] = lyp->nfsly_stateid.other[1];
7749                         stateid.other[2] = lyp->nfsly_stateid.other[2];
7750                 }
7751                 error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
7752                     newfhp, newfhlen, mode, op, name, namelen,
7753                     dpp, &stateid, usecurstateid, layouttype, layoutlen,
7754                     &retonclose, &flh, &laystat, cred, p);
7755                 NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
7756                     laystat, error);
7757                 laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
7758                     &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
7759                     &islocked, cred, p);
7760         } else
7761                 error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
7762                     mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
7763         if (islocked == 2)
7764                 nfscl_rellayout(lyp, 1);
7765         else if (islocked == 1)
7766                 nfscl_rellayout(lyp, 0);
7767         return (error);
7768 }
7769
7770 /*
7771  * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
7772  * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
7773  * handled by nfsrpc_openrpc().
7774  * For the case where op == NULL, dvp is the directory.  When op != NULL, it
7775  * can be NULL.
7776  */
7777 static int
7778 nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
7779     int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
7780     struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
7781     nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
7782     int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
7783     int *laystatp, struct ucred *cred, NFSPROC_T *p)
7784 {
7785         uint32_t *tl;
7786         struct nfsrv_descript nfsd, *nd = &nfsd;
7787         struct nfscldeleg *ndp = NULL;
7788         struct nfsvattr nfsva;
7789         struct nfsclsession *tsep;
7790         uint32_t rflags, deleg;
7791         nfsattrbit_t attrbits;
7792         int error, ret, acesize, limitby, iomode;
7793
7794         *dpp = NULL;
7795         *laystatp = ENXIO;
7796         nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
7797             0, 0, cred);
7798         NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
7799         *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
7800         *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
7801         *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
7802         tsep = nfsmnt_mdssession(nmp);
7803         *tl++ = tsep->nfsess_clientid.lval[0];
7804         *tl = tsep->nfsess_clientid.lval[1];
7805         nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7806         NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7807         *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
7808         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
7809         nfsm_strtom(nd, name, namelen);
7810         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7811         *tl = txdr_unsigned(NFSV4OP_GETATTR);
7812         NFSZERO_ATTRBIT(&attrbits);
7813         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
7814         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
7815         nfsrv_putattrbit(nd, &attrbits);
7816         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
7817         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
7818         if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
7819                 iomode = NFSLAYOUTIOMODE_RW;
7820         else
7821                 iomode = NFSLAYOUTIOMODE_READ;
7822         nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
7823             layouttype, layoutlen, usecurstateid);
7824         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
7825             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
7826         if (error != 0)
7827                 return (error);
7828         NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
7829         if (nd->nd_repstat != 0)
7830                 *laystatp = nd->nd_repstat;
7831         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
7832                 /* ND_NOMOREDATA will be set if the Open operation failed. */
7833                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7834                     6 * NFSX_UNSIGNED);
7835                 op->nfso_stateid.seqid = *tl++;
7836                 op->nfso_stateid.other[0] = *tl++;
7837                 op->nfso_stateid.other[1] = *tl++;
7838                 op->nfso_stateid.other[2] = *tl;
7839                 rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
7840                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
7841                 if (error != 0)
7842                         goto nfsmout;
7843                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
7844                 deleg = fxdr_unsigned(u_int32_t, *tl);
7845                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
7846                     deleg == NFSV4OPEN_DELEGATEWRITE) {
7847                         if (!(op->nfso_own->nfsow_clp->nfsc_flags &
7848                               NFSCLFLAGS_FIRSTDELEG))
7849                                 op->nfso_own->nfsow_clp->nfsc_flags |=
7850                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
7851                         ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
7852                             M_NFSCLDELEG, M_WAITOK);
7853                         LIST_INIT(&ndp->nfsdl_owner);
7854                         LIST_INIT(&ndp->nfsdl_lock);
7855                         ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
7856                         ndp->nfsdl_fhlen = newfhlen;
7857                         NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
7858                         newnfs_copyincred(cred, &ndp->nfsdl_cred);
7859                         nfscl_lockinit(&ndp->nfsdl_rwlock);
7860                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
7861                             NFSX_UNSIGNED);
7862                         ndp->nfsdl_stateid.seqid = *tl++;
7863                         ndp->nfsdl_stateid.other[0] = *tl++;
7864                         ndp->nfsdl_stateid.other[1] = *tl++;
7865                         ndp->nfsdl_stateid.other[2] = *tl++;
7866                         ret = fxdr_unsigned(int, *tl);
7867                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
7868                                 ndp->nfsdl_flags = NFSCLDL_WRITE;
7869                                 /*
7870                                  * Indicates how much the file can grow.
7871                                  */
7872                                 NFSM_DISSECT(tl, u_int32_t *,
7873                                     3 * NFSX_UNSIGNED);
7874                                 limitby = fxdr_unsigned(int, *tl++);
7875                                 switch (limitby) {
7876                                 case NFSV4OPEN_LIMITSIZE:
7877                                         ndp->nfsdl_sizelimit = fxdr_hyper(tl);
7878                                         break;
7879                                 case NFSV4OPEN_LIMITBLOCKS:
7880                                         ndp->nfsdl_sizelimit =
7881                                             fxdr_unsigned(u_int64_t, *tl++);
7882                                         ndp->nfsdl_sizelimit *=
7883                                             fxdr_unsigned(u_int64_t, *tl);
7884                                         break;
7885                                 default:
7886                                         error = NFSERR_BADXDR;
7887                                         goto nfsmout;
7888                                 };
7889                         } else
7890                                 ndp->nfsdl_flags = NFSCLDL_READ;
7891                         if (ret != 0)
7892                                 ndp->nfsdl_flags |= NFSCLDL_RECALL;
7893                         error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
7894                             &ret, &acesize, p);
7895                         if (error != 0)
7896                                 goto nfsmout;
7897                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
7898                         error = NFSERR_BADXDR;
7899                         goto nfsmout;
7900                 }
7901                 if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
7902                     nfscl_assumeposixlocks)
7903                         op->nfso_posixlock = 1;
7904                 else
7905                         op->nfso_posixlock = 0;
7906                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7907                 /* If the 2nd element == NFS_OK, the Getattr succeeded. */
7908                 if (*++tl == 0) {
7909                         error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
7910                             NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
7911                             NULL, NULL, NULL, p, cred);
7912                         if (error != 0)
7913                                 goto nfsmout;
7914                         if (ndp != NULL) {
7915                                 ndp->nfsdl_change = nfsva.na_filerev;
7916                                 ndp->nfsdl_modtime = nfsva.na_mtime;
7917                                 ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
7918                                 *dpp = ndp;
7919                                 ndp = NULL;
7920                         }
7921                         /*
7922                          * At this point, the Open has succeeded, so set
7923                          * nd_repstat = NFS_OK.  If the Layoutget failed,
7924                          * this function just won't return a layout.
7925                          */
7926                         if (nd->nd_repstat == 0) {
7927                                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7928                                 *laystatp = fxdr_unsigned(int, *++tl);
7929                                 if (*laystatp == 0) {
7930                                         error = nfsrv_parselayoutget(nmp, nd,
7931                                             stateidp, retonclosep, flhp);
7932                                         if (error != 0)
7933                                                 *laystatp = error;
7934                                 }
7935                         } else
7936                                 nd->nd_repstat = 0;     /* Return 0 for Open. */
7937                 }
7938         }
7939         if (nd->nd_repstat != 0 && error == 0)
7940                 error = nd->nd_repstat;
7941 nfsmout:
7942         free(ndp, M_NFSCLDELEG);
7943         m_freem(nd->nd_mrep);
7944         return (error);
7945 }
7946
7947 /*
7948  * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
7949  * Used only for mounts with pNFS enabled.
7950  */
7951 static int
7952 nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
7953     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
7954     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
7955     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
7956     int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
7957     int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
7958     struct nfsclflayouthead *flhp, int *laystatp)
7959 {
7960         uint32_t *tl;
7961         int error = 0, deleg, newone, ret, acesize, limitby;
7962         struct nfsrv_descript nfsd, *nd = &nfsd;
7963         struct nfsclopen *op;
7964         struct nfscldeleg *dp = NULL;
7965         struct nfsnode *np;
7966         struct nfsfh *nfhp;
7967         struct nfsclsession *tsep;
7968         nfsattrbit_t attrbits;
7969         nfsv4stateid_t stateid;
7970         struct nfsmount *nmp;
7971
7972         nmp = VFSTONFS(dvp->v_mount);
7973         np = VTONFS(dvp);
7974         *laystatp = ENXIO;
7975         *unlockedp = 0;
7976         *nfhpp = NULL;
7977         *dpp = NULL;
7978         *attrflagp = 0;
7979         *dattrflagp = 0;
7980         if (namelen > NFS_MAXNAMLEN)
7981                 return (ENAMETOOLONG);
7982         NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
7983         /*
7984          * For V4, this is actually an Open op.
7985          */
7986         NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
7987         *tl++ = txdr_unsigned(owp->nfsow_seqid);
7988         *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
7989             NFSV4OPEN_ACCESSREAD);
7990         *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
7991         tsep = nfsmnt_mdssession(nmp);
7992         *tl++ = tsep->nfsess_clientid.lval[0];
7993         *tl = tsep->nfsess_clientid.lval[1];
7994         nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
7995         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7996         *tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
7997         if ((fmode & O_EXCL) != 0) {
7998                 if (NFSHASSESSPERSIST(nmp)) {
7999                         /* Use GUARDED for persistent sessions. */
8000                         *tl = txdr_unsigned(NFSCREATE_GUARDED);
8001                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
8002                 } else {
8003                         /* Otherwise, use EXCLUSIVE4_1. */
8004                         *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8005                         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8006                         *tl++ = cverf.lval[0];
8007                         *tl = cverf.lval[1];
8008                         nfscl_fillsattr(nd, vap, dvp, 0, 0);
8009                 }
8010         } else {
8011                 *tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8012                 nfscl_fillsattr(nd, vap, dvp, 0, 0);
8013         }
8014         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8015         *tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8016         nfsm_strtom(nd, name, namelen);
8017         /* Get the new file's handle and attributes, plus save the FH. */
8018         NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8019         *tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8020         *tl++ = txdr_unsigned(NFSV4OP_GETFH);
8021         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8022         NFSGETATTR_ATTRBIT(&attrbits);
8023         nfsrv_putattrbit(nd, &attrbits);
8024         /* Get the directory's post-op attributes. */
8025         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8026         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8027         (void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8028         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8029         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8030         nfsrv_putattrbit(nd, &attrbits);
8031         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8032         *tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8033         *tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8034         nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8035             layouttype, layoutlen, usecurstateid);
8036         error = nfscl_request(nd, dvp, p, cred, dstuff);
8037         if (error != 0)
8038                 return (error);
8039         NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8040             error);
8041         if (nd->nd_repstat != 0)
8042                 *laystatp = nd->nd_repstat;
8043         NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8044         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8045                 NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8046                 NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8047                     6 * NFSX_UNSIGNED);
8048                 stateid.seqid = *tl++;
8049                 stateid.other[0] = *tl++;
8050                 stateid.other[1] = *tl++;
8051                 stateid.other[2] = *tl;
8052                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8053                 if (error != 0)
8054                         goto nfsmout;
8055                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8056                 deleg = fxdr_unsigned(int, *tl);
8057                 if (deleg == NFSV4OPEN_DELEGATEREAD ||
8058                     deleg == NFSV4OPEN_DELEGATEWRITE) {
8059                         if (!(owp->nfsow_clp->nfsc_flags &
8060                               NFSCLFLAGS_FIRSTDELEG))
8061                                 owp->nfsow_clp->nfsc_flags |=
8062                                   (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8063                         dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8064                             M_NFSCLDELEG, M_WAITOK);
8065                         LIST_INIT(&dp->nfsdl_owner);
8066                         LIST_INIT(&dp->nfsdl_lock);
8067                         dp->nfsdl_clp = owp->nfsow_clp;
8068                         newnfs_copyincred(cred, &dp->nfsdl_cred);
8069                         nfscl_lockinit(&dp->nfsdl_rwlock);
8070                         NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8071                             NFSX_UNSIGNED);
8072                         dp->nfsdl_stateid.seqid = *tl++;
8073                         dp->nfsdl_stateid.other[0] = *tl++;
8074                         dp->nfsdl_stateid.other[1] = *tl++;
8075                         dp->nfsdl_stateid.other[2] = *tl++;
8076                         ret = fxdr_unsigned(int, *tl);
8077                         if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8078                                 dp->nfsdl_flags = NFSCLDL_WRITE;
8079                                 /*
8080                                  * Indicates how much the file can grow.
8081                                  */
8082                                 NFSM_DISSECT(tl, u_int32_t *,
8083                                     3 * NFSX_UNSIGNED);
8084                                 limitby = fxdr_unsigned(int, *tl++);
8085                                 switch (limitby) {
8086                                 case NFSV4OPEN_LIMITSIZE:
8087                                         dp->nfsdl_sizelimit = fxdr_hyper(tl);
8088                                         break;
8089                                 case NFSV4OPEN_LIMITBLOCKS:
8090                                         dp->nfsdl_sizelimit =
8091                                             fxdr_unsigned(u_int64_t, *tl++);
8092                                         dp->nfsdl_sizelimit *=
8093                                             fxdr_unsigned(u_int64_t, *tl);
8094                                         break;
8095                                 default:
8096                                         error = NFSERR_BADXDR;
8097                                         goto nfsmout;
8098                                 };
8099                         } else {
8100                                 dp->nfsdl_flags = NFSCLDL_READ;
8101                         }
8102                         if (ret != 0)
8103                                 dp->nfsdl_flags |= NFSCLDL_RECALL;
8104                         error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8105                             &ret, &acesize, p);
8106                         if (error != 0)
8107                                 goto nfsmout;
8108                 } else if (deleg != NFSV4OPEN_DELEGATENONE) {
8109                         error = NFSERR_BADXDR;
8110                         goto nfsmout;
8111                 }
8112
8113                 /* Now, we should have the status for the SaveFH. */
8114                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8115                 if (*++tl == 0) {
8116                         NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8117                         /*
8118                          * Now, process the GetFH and Getattr for the newly
8119                          * created file. nfscl_mtofh() will set
8120                          * ND_NOMOREDATA if these weren't successful.
8121                          */
8122                         error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8123                         NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8124                         if (error != 0)
8125                                 goto nfsmout;
8126                 } else
8127                         nd->nd_flag |= ND_NOMOREDATA;
8128                 /* Now we have the PutFH and Getattr for the directory. */
8129                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8130                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8131                         if (*++tl != 0)
8132                                 nd->nd_flag |= ND_NOMOREDATA;
8133                         else {
8134                                 NFSM_DISSECT(tl, uint32_t *, 2 *
8135                                     NFSX_UNSIGNED);
8136                                 if (*++tl != 0)
8137                                         nd->nd_flag |= ND_NOMOREDATA;
8138                         }
8139                 }
8140                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8141                         /* Load the directory attributes. */
8142                         error = nfsm_loadattr(nd, dnap);
8143                         NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8144                         if (error != 0)
8145                                 goto nfsmout;
8146                         *dattrflagp = 1;
8147                         if (dp != NULL && *attrflagp != 0) {
8148                                 dp->nfsdl_change = nnap->na_filerev;
8149                                 dp->nfsdl_modtime = nnap->na_mtime;
8150                                 dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8151                         }
8152                         /*
8153                          * We can now complete the Open state.
8154                          */
8155                         nfhp = *nfhpp;
8156                         if (dp != NULL) {
8157                                 dp->nfsdl_fhlen = nfhp->nfh_len;
8158                                 NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8159                                     nfhp->nfh_len);
8160                         }
8161                         /*
8162                          * Get an Open structure that will be
8163                          * attached to the OpenOwner, acquired already.
8164                          */
8165                         error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len, 
8166                             (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8167                             cred, p, NULL, &op, &newone, NULL, 0, false);
8168                         if (error != 0)
8169                                 goto nfsmout;
8170                         op->nfso_stateid = stateid;
8171                         newnfs_copyincred(cred, &op->nfso_cred);
8172
8173                         nfscl_openrelease(nmp, op, error, newone);
8174                         *unlockedp = 1;
8175
8176                         /* Now, handle the RestoreFH and LayoutGet. */
8177                         if (nd->nd_repstat == 0) {
8178                                 NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8179                                 *laystatp = fxdr_unsigned(int, *(tl + 3));
8180                                 if (*laystatp == 0) {
8181                                         error = nfsrv_parselayoutget(nmp, nd,
8182                                             stateidp, retonclosep, flhp);
8183                                         if (error != 0)
8184                                                 *laystatp = error;
8185                                 }
8186                                 NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8187                                     error);
8188                         } else
8189                                 nd->nd_repstat = 0;
8190                 }
8191         }
8192         if (nd->nd_repstat != 0 && error == 0)
8193                 error = nd->nd_repstat;
8194         if (error == NFSERR_STALECLIENTID)
8195                 nfscl_initiate_recovery(owp->nfsow_clp);
8196 nfsmout:
8197         NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8198         if (error == 0)
8199                 *dpp = dp;
8200         else
8201                 free(dp, M_NFSCLDELEG);
8202         m_freem(nd->nd_mrep);
8203         return (error);
8204 }
8205
8206 /*
8207  * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8208  */
8209 static int
8210 nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8211     nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8212     struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8213     struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8214     int *dattrflagp, void *dstuff, int *unlockedp)
8215 {
8216         struct nfscllayout *lyp;
8217         struct nfsclflayouthead flh;
8218         struct nfsfh *nfhp;
8219         struct nfsclsession *tsep;
8220         struct nfsmount *nmp;
8221         nfsv4stateid_t stateid;
8222         int error, layoutlen, layouttype, retonclose, laystat;
8223
8224         error = 0;
8225         nmp = VFSTONFS(dvp->v_mount);
8226         if (NFSHASFLEXFILE(nmp))
8227                 layouttype = NFSLAYOUT_FLEXFILE;
8228         else
8229                 layouttype = NFSLAYOUT_NFSV4_1_FILES;
8230         LIST_INIT(&flh);
8231         tsep = nfsmnt_mdssession(nmp);
8232         layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8233         error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8234             owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8235             dstuff, unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8236             &flh, &laystat);
8237         NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8238             laystat, error);
8239         lyp = NULL;
8240         if (laystat == 0) {
8241                 nfhp = *nfhpp;
8242                 laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8243                     nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8244                     layouttype, laystat, NULL, cred, p);
8245         } else
8246                 laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8247                     retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8248                     cred, p);
8249         if (laystat == 0)
8250                 nfscl_rellayout(lyp, 0);
8251         return (error);
8252 }
8253
8254 /*
8255  * Process the results of a layoutget() operation.
8256  */
8257 static int
8258 nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8259     int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8260     struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8261     int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8262 {
8263         struct nfsclflayout *tflp;
8264         struct nfscldevinfo *dip;
8265         uint8_t *dev;
8266         int i, mirrorcnt;
8267
8268         if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8269                 NFSLOCKMNT(nmp);
8270                 if (!NFSHASFLEXFILE(nmp)) {
8271                         /* Switch to using Flex File Layout. */
8272                         nmp->nm_state |= NFSSTA_FLEXFILE;
8273                 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
8274                         /* Disable pNFS. */
8275                         NFSCL_DEBUG(1, "disable PNFS\n");
8276                         nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8277                 }
8278                 NFSUNLOCKMNT(nmp);
8279         }
8280         if (laystat == 0) {
8281                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8282                 LIST_FOREACH(tflp, flhp, nfsfl_list) {
8283                         if (layouttype == NFSLAYOUT_FLEXFILE)
8284                                 mirrorcnt = tflp->nfsfl_mirrorcnt;
8285                         else
8286                                 mirrorcnt = 1;
8287                         for (i = 0; i < mirrorcnt; i++) {
8288                                 laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8289                                 NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8290                                 if (laystat != 0) {
8291                                         if (layouttype == NFSLAYOUT_FLEXFILE)
8292                                                 dev = tflp->nfsfl_ffm[i].dev;
8293                                         else
8294                                                 dev = tflp->nfsfl_dev;
8295                                         laystat = nfsrpc_getdeviceinfo(nmp, dev,
8296                                             layouttype, notifybit, &dip, cred,
8297                                             p);
8298                                         NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8299                                             laystat);
8300                                         if (laystat != 0)
8301                                                 goto out;
8302                                         laystat = nfscl_adddevinfo(nmp, dip, i,
8303                                             tflp);
8304                                         if (laystat != 0)
8305                                                 printf("nfsrpc_layoutgetresout"
8306                                                     ": cannot add\n");
8307                                 }
8308                         }
8309                 }
8310         }
8311 out:
8312         if (laystat == 0) {
8313                 /*
8314                  * nfscl_layout() always returns with the nfsly_lock
8315                  * set to a refcnt (shared lock).
8316                  * Passing in dvp is sufficient, since it is only used to
8317                  * get the fsid for the file system.
8318                  */
8319                 laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8320                     layouttype, retonclose, flhp, lypp, cred, p);
8321                 NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8322                     laystat);
8323                 if (laystat == 0 && islockedp != NULL)
8324                         *islockedp = 1;
8325         }
8326         return (laystat);
8327 }
8328
8329 /*
8330  * nfs copy_file_range operation.
8331  */
8332 int
8333 nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8334     off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8335     struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8336     struct ucred *cred, bool consecutive, bool *must_commitp)
8337 {
8338         int commit, error, expireret = 0, retrycnt;
8339         u_int32_t clidrev = 0;
8340         struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8341         struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8342         nfsv4stateid_t instateid, outstateid;
8343         void *inlckp, *outlckp;
8344
8345         if (nmp->nm_clp != NULL)
8346                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8347         innfhp = VTONFS(invp)->n_fhp;
8348         outnfhp = VTONFS(outvp)->n_fhp;
8349         retrycnt = 0;
8350         do {
8351                 /* Get both stateids. */
8352                 inlckp = NULL;
8353                 nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8354                     NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8355                     &inlckp);
8356                 outlckp = NULL;
8357                 nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8358                     NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8359                     &outlckp);
8360
8361                 error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8362                     &instateid, &outstateid, innap, inattrflagp, outnap,
8363                     outattrflagp, consecutive, &commit, cred, curthread);
8364                 if (error == 0) {
8365                         if (commit != NFSWRITE_FILESYNC)
8366                                 *must_commitp = true;
8367                         *inoffp += *lenp;
8368                         *outoffp += *lenp;
8369                 } else if (error == NFSERR_STALESTATEID)
8370                         nfscl_initiate_recovery(nmp->nm_clp);
8371                 if (inlckp != NULL)
8372                         nfscl_lockderef(inlckp);
8373                 if (outlckp != NULL)
8374                         nfscl_lockderef(outlckp);
8375                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8376                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8377                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8378                         (void) nfs_catnap(PZERO, error, "nfs_cfr");
8379                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8380                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
8381                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8382                             curthread);
8383                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8384                         error = EIO;
8385                 }
8386                 retrycnt++;
8387         } while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8388             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8389               error == NFSERR_STALEDONTRECOVER ||
8390             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8391             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8392              expireret == 0 && clidrev != 0 && retrycnt < 4));
8393         if (error != 0 && (retrycnt >= 4 ||
8394             error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8395               error == NFSERR_STALEDONTRECOVER))
8396                 error = EIO;
8397         return (error);
8398 }
8399
8400 /*
8401  * The copy RPC.
8402  */
8403 static int
8404 nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8405     size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8406     struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8407     int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8408     NFSPROC_T *p)
8409 {
8410         uint32_t *tl;
8411         int error;
8412         struct nfsrv_descript nfsd;
8413         struct nfsrv_descript *nd = &nfsd;
8414         struct nfsmount *nmp;
8415         nfsattrbit_t attrbits;
8416         uint64_t len;
8417
8418         nmp = VFSTONFS(outvp->v_mount);
8419         *inattrflagp = *outattrflagp = 0;
8420         *commitp = NFSWRITE_UNSTABLE;
8421         len = *lenp;
8422         *lenp = 0;
8423         if (len > nfs_maxcopyrange)
8424                 len = nfs_maxcopyrange;
8425         NFSCL_REQSTART(nd, NFSPROC_COPY, invp, cred);
8426         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8427         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8428         NFSGETATTR_ATTRBIT(&attrbits);
8429         nfsrv_putattrbit(nd, &attrbits);
8430         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8431         *tl = txdr_unsigned(NFSV4OP_PUTFH);
8432         (void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh,
8433             VTONFS(outvp)->n_fhp->nfh_len, 0);
8434         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8435         *tl = txdr_unsigned(NFSV4OP_COPY);
8436         nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8437         nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8438         NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8439         txdr_hyper(inoff, tl); tl += 2;
8440         txdr_hyper(outoff, tl); tl += 2;
8441         txdr_hyper(len, tl); tl += 2;
8442         if (consecutive)
8443                 *tl++ = newnfs_true;
8444         else
8445                 *tl++ = newnfs_false;
8446         *tl++ = newnfs_true;
8447         *tl++ = 0;
8448         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8449         NFSWRITEGETATTR_ATTRBIT(&attrbits);
8450         nfsrv_putattrbit(nd, &attrbits);
8451         error = nfscl_request(nd, invp, p, cred, NULL);
8452         if (error != 0)
8453                 return (error);
8454         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8455                 /* Get the input file's attributes. */
8456                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8457                 if (*(tl + 1) == 0) {
8458                         error = nfsm_loadattr(nd, innap);
8459                         if (error != 0)
8460                                 goto nfsmout;
8461                         *inattrflagp = 1;
8462                 } else
8463                         nd->nd_flag |= ND_NOMOREDATA;
8464         }
8465         /* Skip over return stat for PutFH. */
8466         if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8467                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8468                 if (*++tl != 0)
8469                         nd->nd_flag |= ND_NOMOREDATA;
8470         }
8471         /* Skip over return stat for Copy. */
8472         if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8473                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8474         if (nd->nd_repstat == 0) {
8475                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8476                 if (*tl != 0) {
8477                         /* There should be no callback ids. */
8478                         error = NFSERR_BADXDR;
8479                         goto nfsmout;
8480                 }
8481                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8482                     NFSX_VERF);
8483                 len = fxdr_hyper(tl); tl += 2;
8484                 *commitp = fxdr_unsigned(int, *tl++);
8485                 NFSLOCKMNT(nmp);
8486                 if (!NFSHASWRITEVERF(nmp)) {
8487                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8488                         NFSSETWRITEVERF(nmp);
8489                 } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8490                         NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8491                         nd->nd_repstat = NFSERR_STALEWRITEVERF;
8492                 }
8493                 NFSUNLOCKMNT(nmp);
8494                 tl += (NFSX_VERF / NFSX_UNSIGNED);
8495                 if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8496                         /* Must be a synchronous copy. */
8497                         nd->nd_repstat = NFSERR_NOTSUPP;
8498                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8499                 error = nfsm_loadattr(nd, outnap);
8500                 if (error == 0)
8501                         *outattrflagp = NFS_LATTR_NOSHRINK;
8502                 if (nd->nd_repstat == 0)
8503                         *lenp = len;
8504         } else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8505                 /*
8506                  * For the case where consecutive is not supported, but
8507                  * synchronous is supported, we can try consecutive == false
8508                  * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8509                  * since Copy cannot be done.
8510                  */
8511                 if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8512                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8513                         if (!consecutive || *++tl == newnfs_false)
8514                                 nd->nd_repstat = NFSERR_NOTSUPP;
8515                 } else
8516                         nd->nd_repstat = NFSERR_BADXDR;
8517         }
8518         if (error == 0)
8519                 error = nd->nd_repstat;
8520 nfsmout:
8521         m_freem(nd->nd_mrep);
8522         return (error);
8523 }
8524
8525 /*
8526  * Seek operation.
8527  */
8528 int
8529 nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8530     struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8531 {
8532         int error, expireret = 0, retrycnt;
8533         u_int32_t clidrev = 0;
8534         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8535         struct nfsnode *np = VTONFS(vp);
8536         struct nfsfh *nfhp = NULL;
8537         nfsv4stateid_t stateid;
8538         void *lckp;
8539
8540         if (nmp->nm_clp != NULL)
8541                 clidrev = nmp->nm_clp->nfsc_clientidrev;
8542         nfhp = np->n_fhp;
8543         retrycnt = 0;
8544         do {
8545                 lckp = NULL;
8546                 nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8547                     NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8548                 error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8549                     nap, attrflagp, cred);
8550                 if (error == NFSERR_STALESTATEID)
8551                         nfscl_initiate_recovery(nmp->nm_clp);
8552                 if (lckp != NULL)
8553                         nfscl_lockderef(lckp);
8554                 if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8555                     error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8556                     error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8557                         (void) nfs_catnap(PZERO, error, "nfs_seek");
8558                 } else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8559                     error == NFSERR_BADSTATEID)) && clidrev != 0) {
8560                         expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8561                             curthread);
8562                 } else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8563                         error = EIO;
8564                 }
8565                 retrycnt++;
8566         } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8567             error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8568             error == NFSERR_BADSESSION ||
8569             (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8570             ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8571              expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8572             (error == NFSERR_OPENMODE && retrycnt < 4));
8573         if (error && retrycnt >= 4)
8574                 error = EIO;
8575         return (error);
8576 }
8577
8578 /*
8579  * The seek RPC.
8580  */
8581 static int
8582 nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8583     int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8584 {
8585         uint32_t *tl;
8586         int error;
8587         struct nfsrv_descript nfsd;
8588         struct nfsrv_descript *nd = &nfsd;
8589         nfsattrbit_t attrbits;
8590
8591         *attrflagp = 0;
8592         NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8593         nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8594         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8595         txdr_hyper(*offp, tl); tl += 2;
8596         *tl++ = txdr_unsigned(content);
8597         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8598         NFSGETATTR_ATTRBIT(&attrbits);
8599         nfsrv_putattrbit(nd, &attrbits);
8600         error = nfscl_request(nd, vp, curthread, cred, NULL);
8601         if (error != 0)
8602                 return (error);
8603         if (nd->nd_repstat == 0) {
8604                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8605                 if (*tl++ == newnfs_true)
8606                         *eofp = true;
8607                 else
8608                         *eofp = false;
8609                 *offp = fxdr_hyper(tl);
8610                 /* Just skip over Getattr op status. */
8611                 error = nfsm_loadattr(nd, nap);
8612                 if (error == 0)
8613                         *attrflagp = 1;
8614         }
8615         error = nd->nd_repstat;
8616 nfsmout:
8617         m_freem(nd->nd_mrep);
8618         return (error);
8619 }
8620
8621 /*
8622  * The getextattr RPC.
8623  */
8624 int
8625 nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8626     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8627 {
8628         uint32_t *tl;
8629         int error;
8630         struct nfsrv_descript nfsd;
8631         struct nfsrv_descript *nd = &nfsd;
8632         nfsattrbit_t attrbits;
8633         uint32_t len, len2;
8634
8635         *attrflagp = 0;
8636         NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8637         nfsm_strtom(nd, name, strlen(name));
8638         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8639         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8640         NFSGETATTR_ATTRBIT(&attrbits);
8641         nfsrv_putattrbit(nd, &attrbits);
8642         error = nfscl_request(nd, vp, p, cred, NULL);
8643         if (error != 0)
8644                 return (error);
8645         if (nd->nd_repstat == 0) {
8646                 NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8647                 len = fxdr_unsigned(uint32_t, *tl);
8648                 /* Sanity check lengths. */
8649                 if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
8650                     uiop->uio_resid <= UINT32_MAX) {
8651                         len2 = uiop->uio_resid;
8652                         if (len2 >= len)
8653                                 error = nfsm_mbufuio(nd, uiop, len);
8654                         else {
8655                                 error = nfsm_mbufuio(nd, uiop, len2);
8656                                 if (error == 0) {
8657                                         /*
8658                                          * nfsm_mbufuio() advances to a multiple
8659                                          * of 4, so round up len2 as well.  Then
8660                                          * we need to advance over the rest of
8661                                          * the data, rounding up the remaining
8662                                          * length.
8663                                          */
8664                                         len2 = NFSM_RNDUP(len2);
8665                                         len2 = NFSM_RNDUP(len - len2);
8666                                         if (len2 > 0)
8667                                                 error = nfsm_advance(nd, len2,
8668                                                     -1);
8669                                 }
8670                         }
8671                 } else if (uiop == NULL && len > 0) {
8672                         /* Just wants the length and not the data. */
8673                         error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8674                 } else if (len > 0)
8675                         error = ENOATTR;
8676                 if (error != 0)
8677                         goto nfsmout;
8678                 *lenp = len;
8679                 /* Just skip over Getattr op status. */
8680                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8681                 error = nfsm_loadattr(nd, nap);
8682                 if (error == 0)
8683                         *attrflagp = 1;
8684         }
8685         if (error == 0)
8686                 error = nd->nd_repstat;
8687 nfsmout:
8688         m_freem(nd->nd_mrep);
8689         return (error);
8690 }
8691
8692 /*
8693  * The setextattr RPC.
8694  */
8695 int
8696 nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
8697     struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8698 {
8699         uint32_t *tl;
8700         int error;
8701         struct nfsrv_descript nfsd;
8702         struct nfsrv_descript *nd = &nfsd;
8703         nfsattrbit_t attrbits;
8704
8705         *attrflagp = 0;
8706         NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
8707         if (uiop->uio_resid > nd->nd_maxreq) {
8708                 /* nd_maxreq is set by NFSCL_REQSTART(). */
8709                 m_freem(nd->nd_mreq);
8710                 return (EINVAL);
8711         }
8712         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8713         *tl = txdr_unsigned(NFSV4SXATTR_EITHER);
8714         nfsm_strtom(nd, name, strlen(name));
8715         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8716         *tl = txdr_unsigned(uiop->uio_resid);
8717         error = nfsm_uiombuf(nd, uiop, uiop->uio_resid);
8718         if (error != 0) {
8719                 m_freem(nd->nd_mreq);
8720                 return (error);
8721         }
8722         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8723         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8724         NFSGETATTR_ATTRBIT(&attrbits);
8725         nfsrv_putattrbit(nd, &attrbits);
8726         error = nfscl_request(nd, vp, p, cred, NULL);
8727         if (error != 0)
8728                 return (error);
8729         if (nd->nd_repstat == 0) {
8730                 /* Just skip over the reply and Getattr op status. */
8731                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8732                     NFSX_UNSIGNED);
8733                 error = nfsm_loadattr(nd, nap);
8734                 if (error == 0)
8735                         *attrflagp = 1;
8736         }
8737         if (error == 0)
8738                 error = nd->nd_repstat;
8739 nfsmout:
8740         m_freem(nd->nd_mrep);
8741         return (error);
8742 }
8743
8744 /*
8745  * The removeextattr RPC.
8746  */
8747 int
8748 nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
8749     int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8750 {
8751         uint32_t *tl;
8752         int error;
8753         struct nfsrv_descript nfsd;
8754         struct nfsrv_descript *nd = &nfsd;
8755         nfsattrbit_t attrbits;
8756
8757         *attrflagp = 0;
8758         NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
8759         nfsm_strtom(nd, name, strlen(name));
8760         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8761         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8762         NFSGETATTR_ATTRBIT(&attrbits);
8763         nfsrv_putattrbit(nd, &attrbits);
8764         error = nfscl_request(nd, vp, p, cred, NULL);
8765         if (error != 0)
8766                 return (error);
8767         if (nd->nd_repstat == 0) {
8768                 /* Just skip over the reply and Getattr op status. */
8769                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
8770                     NFSX_UNSIGNED);
8771                 error = nfsm_loadattr(nd, nap);
8772                 if (error == 0)
8773                         *attrflagp = 1;
8774         }
8775         if (error == 0)
8776                 error = nd->nd_repstat;
8777 nfsmout:
8778         m_freem(nd->nd_mrep);
8779         return (error);
8780 }
8781
8782 /*
8783  * The listextattr RPC.
8784  */
8785 int
8786 nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
8787     size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
8788     struct ucred *cred, NFSPROC_T *p)
8789 {
8790         uint32_t *tl;
8791         int cnt, error, i, len;
8792         struct nfsrv_descript nfsd;
8793         struct nfsrv_descript *nd = &nfsd;
8794         nfsattrbit_t attrbits;
8795         u_char c;
8796
8797         *attrflagp = 0;
8798         NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
8799         NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8800         txdr_hyper(*cookiep, tl); tl += 2;
8801         *tl++ = txdr_unsigned(*lenp);
8802         *tl = txdr_unsigned(NFSV4OP_GETATTR);
8803         NFSGETATTR_ATTRBIT(&attrbits);
8804         nfsrv_putattrbit(nd, &attrbits);
8805         error = nfscl_request(nd, vp, p, cred, NULL);
8806         if (error != 0)
8807                 return (error);
8808         *eofp = true;
8809         *lenp = 0;
8810         if (nd->nd_repstat == 0) {
8811                 NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
8812                 *cookiep = fxdr_hyper(tl); tl += 2;
8813                 cnt = fxdr_unsigned(int, *tl);
8814                 if (cnt < 0) {
8815                         error = EBADRPC;
8816                         goto nfsmout;
8817                 }
8818                 for (i = 0; i < cnt; i++) {
8819                         NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8820                         len = fxdr_unsigned(int, *tl);
8821                         if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
8822                                 error = EBADRPC;
8823                                 goto nfsmout;
8824                         }
8825                         if (uiop == NULL)
8826                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8827                         else if (uiop->uio_resid >= len + 1) {
8828                                 c = len;
8829                                 error = uiomove(&c, sizeof(c), uiop);
8830                                 if (error == 0)
8831                                         error = nfsm_mbufuio(nd, uiop, len);
8832                         } else {
8833                                 error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
8834                                 *eofp = false;
8835                         }
8836                         if (error != 0)
8837                                 goto nfsmout;
8838                         *lenp += (len + 1);
8839                 }
8840                 /* Get the eof and skip over the Getattr op status. */
8841                 NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
8842                 /*
8843                  * *eofp is set false above, because it wasn't able to copy
8844                  * all of the reply.
8845                  */
8846                 if (*eofp && *tl == 0)
8847                         *eofp = false;
8848                 error = nfsm_loadattr(nd, nap);
8849                 if (error == 0)
8850                         *attrflagp = 1;
8851         }
8852         if (error == 0)
8853                 error = nd->nd_repstat;
8854 nfsmout:
8855         m_freem(nd->nd_mrep);
8856         return (error);
8857 }
8858
8859 /*
8860  * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
8861  */
8862 static struct mbuf *
8863 nfsm_split(struct mbuf *mp, uint64_t xfer)
8864 {
8865         struct mbuf *m, *m2;
8866         vm_page_t pg;
8867         int i, j, left, pgno, plen, trim;
8868         char *cp, *cp2;
8869
8870         if ((mp->m_flags & M_EXTPG) == 0) {
8871                 m = m_split(mp, xfer, M_WAITOK);
8872                 return (m);
8873         }
8874
8875         /* Find the correct mbuf to split at. */
8876         for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
8877                 xfer -= m->m_len;
8878         if (m == NULL)
8879                 return (NULL);
8880
8881         /* If xfer == m->m_len, we can just split the mbuf list. */
8882         if (xfer == m->m_len) {
8883                 m2 = m->m_next;
8884                 m->m_next = NULL;
8885                 return (m2);
8886         }
8887
8888         /* Find the page to split at. */
8889         pgno = 0;
8890         left = xfer;
8891         do {
8892                 if (pgno == 0)
8893                         plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
8894                 else
8895                         plen = m_epg_pagelen(m, pgno, 0);
8896                 if (left <= plen)
8897                         break;
8898                 left -= plen;
8899                 pgno++;
8900         } while (pgno < m->m_epg_npgs);
8901         if (pgno == m->m_epg_npgs)
8902                 panic("nfsm_split: eroneous ext_pgs mbuf");
8903
8904         m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
8905         m2->m_epg_flags |= EPG_FLAG_ANON;
8906
8907         /*
8908          * If left < plen, allocate a new page for the new mbuf
8909          * and copy the data after left in the page to this new
8910          * page.
8911          */
8912         if (left < plen) {
8913                 pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
8914                     VM_ALLOC_WIRED);
8915                 m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
8916                 m2->m_epg_npgs = 1;
8917
8918                 /* Copy the data after left to the new page. */
8919                 trim = plen - left;
8920                 cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
8921                 if (pgno == 0)
8922                         cp += m->m_epg_1st_off;
8923                 cp += left;
8924                 cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
8925                 if (pgno == m->m_epg_npgs - 1)
8926                         m2->m_epg_last_len = trim;
8927                 else {
8928                         cp2 += PAGE_SIZE - trim;
8929                         m2->m_epg_1st_off = PAGE_SIZE - trim;
8930                         m2->m_epg_last_len = m->m_epg_last_len;
8931                 }
8932                 memcpy(cp2, cp, trim);
8933                 m2->m_len = trim;
8934         } else {
8935                 m2->m_len = 0;
8936                 m2->m_epg_last_len = m->m_epg_last_len;
8937         }
8938
8939         /* Move the pages beyond pgno to the new mbuf. */
8940         for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
8941                 m2->m_epg_pa[j] = m->m_epg_pa[i];
8942                 /* Never moves page 0. */
8943                 m2->m_len += m_epg_pagelen(m, i, 0);
8944         }
8945         m2->m_epg_npgs = j;
8946         m->m_epg_npgs = pgno + 1;
8947         m->m_epg_last_len = left;
8948         m->m_len = xfer;
8949
8950         m2->m_next = m->m_next;
8951         m->m_next = NULL;
8952         return (m2);
8953 }
8954
8955 /*
8956  * Do the NFSv4.1 Bind Connection to Session.
8957  * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
8958  */
8959 void
8960 nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
8961 {
8962         struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
8963         uint32_t res, *tl;
8964         struct nfsrv_descript nfsd;
8965         struct nfsrv_descript *nd = &nfsd;
8966         struct rpc_callextra ext;
8967         struct timeval utimeout;
8968         enum clnt_stat stat;
8969         int error;
8970
8971         nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
8972             NFS_VER4, rcp->minorvers, NULL);
8973         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
8974         memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
8975         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
8976         *tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
8977         *tl = newnfs_false;
8978
8979         memset(&ext, 0, sizeof(ext));
8980         utimeout.tv_sec = 30;
8981         utimeout.tv_usec = 0;
8982         ext.rc_auth = authunix_create(cr);
8983         nd->nd_mrep = NULL;
8984         stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
8985             &nd->nd_mrep, utimeout);
8986         AUTH_DESTROY(ext.rc_auth);
8987         if (stat != RPC_SUCCESS) {
8988                 printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
8989                 return;
8990         }
8991         if (nd->nd_mrep == NULL) {
8992                 printf("nfsrpc_bindconnsess: no reply args\n");
8993                 return;
8994         }
8995         error = 0;
8996         newnfs_realign(&nd->nd_mrep, M_WAITOK);
8997         nd->nd_md = nd->nd_mrep;
8998         nd->nd_dpos = mtod(nd->nd_md, char *);
8999         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9000         nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9001         if (nd->nd_repstat == NFSERR_OK) {
9002                 res = fxdr_unsigned(uint32_t, *tl);
9003                 if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9004                     -1)) != 0)
9005                         goto nfsmout;
9006                 NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9007                     4 * NFSX_UNSIGNED);
9008                 tl += 3;
9009                 if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9010                         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9011                         res = fxdr_unsigned(uint32_t, *tl);
9012                         if (res != NFSCDFS4_BOTH)
9013                                 printf("nfsrpc_bindconnsess: did not "
9014                                     "return FS4_BOTH\n");
9015                 } else
9016                         printf("nfsrpc_bindconnsess: not same "
9017                             "sessionid\n");
9018         } else if (nd->nd_repstat != NFSERR_BADSESSION)
9019                 printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9020 nfsmout:
9021         if (error != 0)
9022                 printf("nfsrpc_bindconnsess: reply bad xdr\n");
9023         m_freem(nd->nd_mrep);
9024 }
9025
9026 /*
9027  * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
9028  * locked vnode.
9029  */
9030 static void
9031 nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td)
9032 {
9033         struct nfsvattr nfsva;
9034         struct nfsfsinfo fs;
9035         struct nfsstatfs sb;
9036         struct mount *mp;
9037         struct nfsmount *nmp;
9038         uint32_t lease;
9039         int attrflag, error;
9040
9041         mp = vp->v_mount;
9042         nmp = VFSTONFS(mp);
9043         error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva,
9044             &attrflag, NULL);
9045         if (attrflag != 0)
9046                 nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
9047         if (error == 0) {
9048                 NFSLOCKCLSTATE();
9049                 if (nmp->nm_clp != NULL)
9050                         nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease);
9051                 NFSUNLOCKCLSTATE();
9052                 mtx_lock(&nmp->nm_mtx);
9053                 nfscl_loadfsinfo(nmp, &fs);
9054                 nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat);
9055                 mp->mnt_stat.f_iosize = newnfs_iosize(nmp);
9056                 mtx_unlock(&nmp->nm_mtx);
9057         }
9058 }