]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/rpc/rpcclnt.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / rpc / rpcclnt.c
1 /* $FreeBSD$ */
2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
3
4 /*-
5  * copyright (c) 2003
6  * the regents of the university of michigan
7  * all rights reserved
8  * 
9  * permission is granted to use, copy, create derivative works and redistribute
10  * this software and such derivative works for any purpose, so long as the name
11  * of the university of michigan is not used in any advertising or publicity
12  * pertaining to the use or distribution of this software without specific,
13  * written prior authorization.  if the above copyright notice or any other
14  * identification of the university of michigan is included in any copy of any
15  * portion of this software, then the disclaimer below must also be included.
16  * 
17  * this software is provided as is, without representation from the university
18  * of michigan as to its fitness for any purpose, and without warranty by the
19  * university of michigan of any kind, either express or implied, including
20  * without limitation the implied warranties of merchantability and fitness for
21  * a particular purpose. the regents of the university of michigan shall not be
22  * liable for any damages, including special, indirect, incidental, or
23  * consequential damages, with respect to any claim arising out of or in
24  * connection with the use of the software, even if it has been or is hereafter
25  * advised of the possibility of such damages.
26  */
27
28 /*-
29  * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30  * California.  All rights reserved.
31  * 
32  * This code is derived from software contributed to Berkeley by Rick Macklem at
33  * The University of Guelph.
34  * 
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions are
37  * met: 1. Redistributions of source code must retain the above copyright
38  * notice, this list of conditions and the following disclaimer. 2.
39  * Redistributions in binary form must reproduce the above copyright notice,
40  * this list of conditions and the following disclaimer in the documentation
41  * and/or other materials provided with the distribution. 3. All advertising
42  * materials mentioning features or use of this software must display the
43  * following acknowledgement: This product includes software developed by the
44  * University of California, Berkeley and its contributors. 4. Neither the
45  * name of the University nor the names of its contributors may be used to
46  * endorse or promote products derived from this software without specific
47  * prior written permission.
48  * 
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52  * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  * 
61  * @(#)nfs_socket.c     8.5 (Berkeley) 3/30/95
62  */
63
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66  *      can run, but clean it up! */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
73 #include <sys/mbuf.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
76 #include <sys/uio.h>
77 #include <sys/lock.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
82
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
96
97 /* memory management */
98 #ifdef __OpenBSD__
99 struct pool     rpctask_pool;
100 struct pool     rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
103 #else
104 static          MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
105 #endif
106
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
108
109 /*
110  * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111  * and mean deviation of rtt for the appropriate type of rpc for the frequent
112  * rpcs and a default for the others. The justification for doing "other"
113  * this way is that these rpcs happen so infrequently that timer est. would
114  * probably be stale. Also, since many of these rpcs are non-idempotent, a
115  * conservative timeout is desired. getattr, lookup - A+2D read, write     -
116  * A+4D other           - nm_timeo
117  */
118 #define RPC_RTO(n, t) \
119         ((t) == 0 ? (n)->rc_timeo : \
120          ((t) < 3 ? \
121           (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122           ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
123
124 #define RPC_SRTT(s,r)   (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
125                                 (r)->r_procnum) - 1]
126
127 #define RPC_SDRTT(s,r)  (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
128                                 (r)->r_procnum) - 1]
129
130
131 /*
132  * There is a congestion window for outstanding rpcs maintained per mount
133  * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134  * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135  * August 1988. describes for TCP. The cwnd size is chopped in half on a
136  * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137  * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138  * are scaled for integer arith.) Variants of "slow start" were tried and
139  * were found to be too much of a performance hit (ave. rtt 3 times larger),
140  * I suspect due to the large rtt that nfs rpcs have.
141  */
142 #define RPC_CWNDSCALE   256
143 #define RPC_MAXCWND     (RPC_CWNDSCALE * 32)
144 static const int      rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
145
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149         "",                     /* no good message... */
150         "remote server hasn't exported program.",
151         "remote server can't support version number.",
152         "program can't support procedure.",
153         "procedure can't decode params.",
154         "remote error.  remote side memory allocation failure?"
155 };
156
157 char *rpc_errstr_denied[2] = {
158         "remote server doesnt support rpc version 2!",
159         "remote server authentication error."
160 };
161
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
164         "",
165         "auth error: bad credential (seal broken).",
166         "auth error: client must begin new session.",
167         "auth error: bad verifier (seal broken).",
168         "auth error: verifier expired or replayed.",
169         "auth error: rejected for security reasons.",
170 };
171
172 /*
173  * Static data, mostly RPC constants in XDR form
174  */
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
176
177 /*
178  * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179  * rpc_autherr, rpc_auth_kerb;
180  */
181
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
185 int      rpcclnt_ticks;
186
187 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
188
189 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
194
195
196 #ifdef RPCCLNT_DEBUG
197 int             rpcdebugon = 0;
198 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
199 #endif
200
201 /*
202  * Queue head for rpctask's
203  */
204 static 
205 TAILQ_HEAD(, rpctask) rpctask_q;
206 struct callout  rpcclnt_callout;
207
208 #ifdef __OpenBSD__
209 static int             rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
210 static int             rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
211 #else
212 static int             rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
213 static int             rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
214 #endif
215
216 static int             rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
217
218 static int             rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
219 static void            rpcclnt_timer(void *);
220 static int             rpcclnt_sndlock(int *, struct rpctask *);
221 static void            rpcclnt_sndunlock(int *);
222 static int             rpcclnt_rcvlock(struct rpctask *);
223 static void            rpcclnt_rcvunlock(int *);
224 #if 0
225 void            rpcclnt_realign(struct mbuf *, int);
226 #else
227 static void     rpcclnt_realign(struct mbuf **, int);
228 #endif
229
230 static struct mbuf    *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
231 static int             rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
232 static u_int32_t       rpcclnt_proct(struct rpcclnt *, u_int32_t);
233 static int             rpc_adv(struct mbuf **, caddr_t *, int, int);
234 static void     rpcclnt_softterm(struct rpctask * task);
235
236 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
237
238 void
239 rpcclnt_init(void)
240 {
241 #ifdef __OpenBSD__
242         static struct timeout rpcclnt_timer_to;
243 #endif
244
245         rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
246         if (rpcclnt_ticks < 1)
247                 rpcclnt_ticks = 1;
248         rpcstats.rpcretries = 0;
249         rpcstats.rpcrequests = 0;
250         rpcstats.rpctimeouts = 0;
251         rpcstats.rpcunexpected = 0;
252         rpcstats.rpcinvalid = 0;
253
254         /*
255          * rpc constants how about actually using more than one of these!
256          */
257
258         rpc_reply = txdr_unsigned(RPC_REPLY);
259         rpc_vers = txdr_unsigned(RPC_VER2);
260         rpc_call = txdr_unsigned(RPC_CALL);
261 #if 0
262         rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
263         rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
264         rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
265         rpc_autherr = txdr_unsigned(RPC_AUTHERR);
266         rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
267         rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
268 #endif
269
270         /* initialize rpctask queue */
271         TAILQ_INIT(&rpctask_q);
272
273 #ifdef __OpenBSD__
274         /* initialize pools */
275         pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
276                   "rpctask_p", NULL);
277         pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
278         pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
279
280         pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
281
282         /* initialize timers */
283         timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
284         rpcclnt_timer(&rpcclnt_timer_to);
285 #else /* !__OpenBSD__ */
286         callout_init(&rpcclnt_callout, 0);
287 #endif /* !__OpenBSD__ */
288
289         RPCDEBUG("rpc initialed");
290
291         return;
292 }
293
294 void
295 rpcclnt_uninit(void)
296 {
297         RPCDEBUG("uninit");
298         /* XXX delete sysctl variables? */
299         callout_stop(&rpcclnt_callout);
300 }
301
302 int
303 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
304     struct rpcclnt * clnt;
305     struct rpc_program * program;
306     struct sockaddr * addr;
307     int sotype;
308     int soproto;
309     struct rpc_auth * auth;
310     int max_read_size;
311     int max_write_size;
312     int flags;
313 {
314         if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
315           RPC_RETURN (EFAULT);
316
317         if (program->prog_name == NULL)
318           RPC_RETURN (EFAULT);
319         clnt->rc_prog = program;
320
321         clnt->rc_name = addr;
322         clnt->rc_sotype = sotype;
323         clnt->rc_soproto = soproto;
324         clnt->rc_auth = auth;
325         clnt->rc_rsize = max_read_size;
326         clnt->rc_wsize = max_write_size;
327         clnt->rc_flag = flags;
328
329         clnt->rc_proctlen = 0;
330         clnt->rc_proct = NULL;
331
332         RPC_RETURN (0);
333 }
334
335 /*
336  * Initialize sockets and congestion for a new RPC connection. We do not free
337  * the sockaddr if error.
338  */
339 int
340 rpcclnt_connect(rpc, td)
341         struct rpcclnt *rpc;
342         RPC_EXEC_CTX td;
343 {
344         struct socket  *so;
345         int             s, error, rcvreserve, sndreserve;
346         struct sockaddr *saddr;
347
348 #ifdef __OpenBSD__
349         struct sockaddr_in *sin;
350         struct mbuf    *m;
351 #else
352         struct sockaddr_in sin;
353
354         int             soarg;
355         struct sockopt  opt;
356 #endif
357
358         if (rpc == NULL) {
359                 RPCDEBUG("no rpcclnt struct!\n");
360                 RPC_RETURN(EFAULT);
361         }
362
363         /* create the socket */
364         rpc->rc_so = NULL;
365
366         saddr = rpc->rc_name;
367
368         error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
369                          rpc->rc_soproto, td->td_ucred, td);
370         if (error) {
371                 RPCDEBUG("error %d in socreate()", error);
372                 RPC_RETURN(error);
373         }
374         so = rpc->rc_so;
375         rpc->rc_soflags = so->so_proto->pr_flags;
376
377         /*
378          * Some servers require that the client port be a reserved port
379          * number. We always allocate a reserved port, as this prevents
380          * filehandle disclosure through UDP port capture.
381          */
382         if (saddr->sa_family == AF_INET) {
383 #ifdef __OpenBSD__
384                 struct mbuf    *mopt;
385                 int            *ip;
386 #endif
387
388 #ifdef __OpenBSD__
389                 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
390                 mopt->m_len = sizeof(int);
391                 ip = mtod(mopt, int *);
392                 *ip = IP_PORTRANGE_LOW;
393
394                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
395 #else
396                 soarg = IP_PORTRANGE_LOW;
397                 bzero(&opt, sizeof(struct sockopt));
398                 opt.sopt_dir = SOPT_SET;
399                 opt.sopt_level = IPPROTO_IP;
400                 opt.sopt_name = IP_PORTRANGE;
401                 opt.sopt_val = &soarg;
402                 opt.sopt_valsize = sizeof(soarg);
403
404                 error = sosetopt(so, &opt);
405 #endif
406                 if (error)
407                         goto bad;
408
409 #ifdef __OpenBSD__
410                 MGET(m, M_TRYWAIT, MT_SONAME);
411                 sin = mtod(m, struct sockaddr_in *);
412                 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
413                 sin->sin_family = AF_INET;
414                 sin->sin_addr.s_addr = INADDR_ANY;
415                 sin->sin_port = htons(0);
416                 error = sobind(so, m);
417                 m_freem(m);
418 #else
419                 sin.sin_len = sizeof(struct sockaddr_in);
420                 sin.sin_family = AF_INET;
421                 sin.sin_addr.s_addr = INADDR_ANY;
422                 sin.sin_port = htons(0);
423                 /*
424                  * &thread0 gives us root credentials to ensure sobind
425                  * will give us a reserved ephemeral port.
426                  */
427                 error = sobind(so, (struct sockaddr *) & sin, &thread0);
428 #endif
429                 if (error)
430                         goto bad;
431
432 #ifdef __OpenBSD__
433                 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
434                 mopt->m_len = sizeof(int);
435                 ip = mtod(mopt, int *);
436                 *ip = IP_PORTRANGE_DEFAULT;
437                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
438 #else
439                 soarg = IP_PORTRANGE_DEFAULT;
440                 bzero(&opt, sizeof(struct sockopt));
441                 opt.sopt_dir = SOPT_SET;
442                 opt.sopt_level = IPPROTO_IP;
443                 opt.sopt_name = IP_PORTRANGE;
444                 opt.sopt_val = &soarg;
445                 opt.sopt_valsize = sizeof(soarg);
446                 error = sosetopt(so, &opt);
447 #endif
448                 if (error)
449                         goto bad;
450         }
451         /*
452          * Protocols that do not require connections may be optionally left
453          * unconnected for servers that reply from a port other than
454          * NFS_PORT.
455          */
456         if (rpc->rc_flag & RPCCLNT_NOCONN) {
457                 if (rpc->rc_soflags & PR_CONNREQUIRED) {
458                         error = ENOTCONN;
459                         goto bad;
460                 }
461         } else {
462                 error = soconnect(so, saddr, td);
463                 if (error)
464                         goto bad;
465
466                 /*
467                  * Wait for the connection to complete. Cribbed from the
468                  * connect system call but with the wait timing out so that
469                  * interruptible mounts don't hang here for a long time.
470                  */
471 #ifdef __OpenBSD__
472                 s = splsoftnet();
473 #else
474                 s = splnet();
475 #endif
476                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
477                         (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
478                                      "rpc", 2 * hz);
479
480                         /*
481                          * XXX needs to catch interrupt signals. something
482                          * like this: if ((so->so_state & SS_ISCONNECTING) &&
483                          * so->so_error == 0 && rep && (error =
484                          * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
485                          * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
486                          * bad; }
487                          */
488                 }
489                 if (so->so_error) {
490                         error = so->so_error;
491                         so->so_error = 0;
492                         splx(s);
493                         goto bad;
494                 }
495                 splx(s);
496         }
497         if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
498                 so->so_rcv.sb_timeo = (5 * hz);
499                 so->so_snd.sb_timeo = (5 * hz);
500         } else {
501                 so->so_rcv.sb_timeo = 0;
502                 so->so_snd.sb_timeo = 0;
503         }
504
505
506         if (rpc->rc_sotype == SOCK_DGRAM) {
507                 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
508                 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
509         } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
510                 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
511                 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
512         } else {
513                 if (rpc->rc_sotype != SOCK_STREAM)
514                         panic("rpcclnt_connect() bad sotype");
515                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
516 #ifdef __OpenBSD__
517                         MGET(m, M_TRYWAIT, MT_SOOPTS);
518                         *mtod(m, int32_t *) = 1;
519                         m->m_len = sizeof(int32_t);
520                         sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
521 #else
522                         soarg = 1;
523
524                         bzero(&opt, sizeof(struct sockopt));
525                         opt.sopt_dir = SOPT_SET;
526                         opt.sopt_level = SOL_SOCKET;
527                         opt.sopt_name = SO_KEEPALIVE;
528                         opt.sopt_val = &soarg;
529                         opt.sopt_valsize = sizeof(soarg);
530                         sosetopt(so, &opt);
531 #endif
532                 }
533                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
534 #ifdef __OpenBSD__
535                         MGET(m, M_TRYWAIT, MT_SOOPTS);
536                         *mtod(m, int32_t *) = 1;
537                         m->m_len = sizeof(int32_t);
538                         sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
539 #else
540                         soarg = 1;
541
542                         bzero(&opt, sizeof(struct sockopt));
543                         opt.sopt_dir = SOPT_SET;
544                         opt.sopt_level = IPPROTO_TCP;
545                         opt.sopt_name = TCP_NODELAY;
546                         opt.sopt_val = &soarg;
547                         opt.sopt_valsize = sizeof(soarg);
548                         sosetopt(so, &opt);
549 #endif
550                 }
551                 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
552                               sizeof(u_int32_t)) * 2;
553                 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
554                               sizeof(u_int32_t)) * 2;
555         }
556         error = soreserve(so, sndreserve, rcvreserve);
557         if (error)
558                 goto bad;
559         so->so_rcv.sb_flags |= SB_NOINTR;
560         so->so_snd.sb_flags |= SB_NOINTR;
561
562         /* Initialize other non-zero congestion variables */
563         rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
564                  rpc->rc_srtt[3] = (RPC_TIMEO << 3);
565         rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
566                 rpc->rc_sdrtt[3] = 0;
567         rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
568         rpc->rc_sent = 0;
569         rpc->rc_timeouts = 0;
570         RPC_RETURN(0);
571
572 bad:
573         rpcclnt_disconnect(rpc);
574         RPC_RETURN(error);
575 }
576
577
578 /*
579  * Reconnect routine:
580  * Called when a connection is broken on a reliable protocol.
581  * - clean up the old socket
582  * - rpcclnt_connect() again
583  * - set R_MUSTRESEND for all outstanding requests on mount point
584  * If this fails the mount point is DEAD!
585  * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
586  */
587 int
588 rpcclnt_reconnect(rep, td)
589         struct rpctask *rep;
590         RPC_EXEC_CTX td;
591 {
592         struct rpctask *rp;
593         struct rpcclnt *rpc = rep->r_rpcclnt;
594         int             error;
595
596         rpcclnt_disconnect(rpc);
597         while ((error = rpcclnt_connect(rpc, td)) != 0) {
598                 if (error == EINTR || error == ERESTART)
599                         RPC_RETURN(EINTR);
600                 tsleep(&lbolt, PSOCK, "rpccon", 0);
601         }
602
603         /*
604          * Loop through outstanding request list and fix up all requests on
605          * old socket.
606          */
607         for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
608              rp = TAILQ_NEXT(rp, r_chain)) {
609                 if (rp->r_rpcclnt == rpc)
610                         rp->r_flags |= R_MUSTRESEND;
611         }
612         RPC_RETURN(0);
613 }
614
615 /*
616  * RPC transport disconnect. Clean up and unlink.
617  */
618 void
619 rpcclnt_disconnect(rpc)
620         struct rpcclnt *rpc;
621 {
622         struct socket  *so;
623
624         if (rpc->rc_so) {
625                 so = rpc->rc_so;
626                 rpc->rc_so = NULL;
627                 soshutdown(so, 2);
628                 soclose(so);
629         }
630 }
631
632 void
633 rpcclnt_safedisconnect(struct rpcclnt * rpc)
634 {
635         struct rpctask  dummytask;
636
637         bzero(&dummytask, sizeof(dummytask));
638         dummytask.r_rpcclnt = rpc;
639         rpcclnt_rcvlock(&dummytask);
640         rpcclnt_disconnect(rpc);
641         rpcclnt_rcvunlock(&rpc->rc_flag);
642 }
643
644 /*
645  * This is the rpc send routine. For connection based socket types, it
646  * must be called with an rpcclnt_sndlock() on the socket.
647  * "rep == NULL" indicates that it has been called from a server.
648  * For the client side:
649  * - return EINTR if the RPC is terminated, 0 otherwise
650  * - set R_MUSTRESEND if the send fails for any reason
651  * - do any cleanup required by recoverable socket errors (?)
652  * For the server side:
653  * - return EINTR or ERESTART if interrupted by a signal
654  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
655  * - do any cleanup required by recoverable socket errors (?)
656  */
657 static int
658 rpcclnt_send(so, nam, top, rep)
659         struct socket  *so;
660 #ifdef __OpenBSD__
661         struct mbuf    *nam;
662 #else
663         struct sockaddr *nam;
664 #endif
665         struct mbuf    *top;
666         struct rpctask *rep;
667 {
668 #ifdef __OpenBSD__
669         struct mbuf    *sendnam;
670 #else
671         struct sockaddr *sendnam;
672         struct thread  *td = curthread;
673 #endif
674         int error, soflags, flags;
675
676         if (rep) {
677                 if (rep->r_flags & R_SOFTTERM) {
678                         m_freem(top);
679                         RPC_RETURN(EINTR);
680                 }
681                 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
682                         rep->r_flags |= R_MUSTRESEND;
683                         m_freem(top);
684                         RPC_RETURN(0);
685                 }
686                 rep->r_flags &= ~R_MUSTRESEND;
687                 soflags = rep->r_rpcclnt->rc_soflags;
688         } else
689                 soflags = so->so_proto->pr_flags;
690
691         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
692                 sendnam = NULL;
693         else
694                 sendnam = nam;
695
696         if (so->so_type == SOCK_SEQPACKET)
697                 flags = MSG_EOR;
698         else
699                 flags = 0;
700
701         /*
702          * XXXRW: If/when this code becomes MPSAFE itself, Giant might have
703          * to be conditionally acquired earlier for the stack so has to avoid
704          * lock order reversals with any locks held over rpcclnt_send().
705          */
706         error = sosend(so, sendnam, NULL, top, NULL, flags, td);
707         if (error) {
708                 if (rep) {
709                         log(LOG_INFO, "rpc send error %d for service %s\n", error,
710                             rep->r_rpcclnt->rc_prog->prog_name);
711                         /*
712                          * Deal with errors for the client side.
713                          */
714                         if (rep->r_flags & R_SOFTTERM)
715                                 error = EINTR;
716                         else
717                                 rep->r_flags |= R_MUSTRESEND;
718                 } else
719                         log(LOG_INFO, "rpc service send error %d\n", error);
720
721                 /*
722                  * Handle any recoverable (soft) socket errors here.
723                  */
724                 if (error != EINTR && error != ERESTART &&
725                     error != EWOULDBLOCK && error != EPIPE)
726                         error = 0;
727         }
728         RPC_RETURN(error);
729 }
730
731 /*
732  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
733  * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
734  * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
735  * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
736  * must be very careful to read an entire record once we have read any of it,
737  * even if the system call has been interrupted.
738  */
739 static int
740 rpcclnt_receive(rep, aname, mp, td)
741         struct rpctask *rep;
742 #ifdef __OpenBSD__
743         struct mbuf   **aname;
744 #else
745         struct sockaddr **aname;
746 #endif
747         struct mbuf   **mp;
748         RPC_EXEC_CTX  td;
749 {
750         struct socket  *so;
751         struct uio      auio;
752         struct iovec    aio;
753         struct mbuf    *m;
754         struct mbuf    *control;
755         u_int32_t       len;
756 #ifdef __OpenBSD__
757         struct mbuf   **getnam;
758 #else
759         struct sockaddr **getnam;
760 #endif
761         int error, sotype, rcvflg;
762
763         /*
764          * Set up arguments for soreceive()
765          */
766         *mp = NULL;
767         *aname = NULL;
768         sotype = rep->r_rpcclnt->rc_sotype;
769
770         /*
771          * For reliable protocols, lock against other senders/receivers in
772          * case a reconnect is necessary. For SOCK_STREAM, first get the
773          * Record Mark to find out how much more there is to get. We must
774          * lock the socket against other receivers until we have an entire
775          * rpc request/reply.
776          */
777         if (sotype != SOCK_DGRAM) {
778                 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
779                 if (error)
780                         RPC_RETURN(error);
781 tryagain:
782                 /*
783                  * Check for fatal errors and resending request.
784                  */
785                 /*
786                  * Ugh: If a reconnect attempt just happened, rc_so would
787                  * have changed. NULL indicates a failed attempt that has
788                  * essentially shut down this mount point.
789                  */
790                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
791                         rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
792                         RPC_RETURN(EINTR);
793                 }
794                 so = rep->r_rpcclnt->rc_so;
795                 if (!so) {
796                         error = rpcclnt_reconnect(rep, td);
797                         if (error) {
798                                 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
799                                 RPC_RETURN(error);
800                         }
801                         goto tryagain;
802                 }
803                 while (rep->r_flags & R_MUSTRESEND) {
804                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
805                         rpcstats.rpcretries++;
806                         error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
807                         if (error) {
808                                 if (error == EINTR || error == ERESTART ||
809                                     (error = rpcclnt_reconnect(rep, td)) != 0) {
810                                         rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
811                                         RPC_RETURN(error);
812                                 }
813                                 goto tryagain;
814                         }
815                 }
816                 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
817                 if (sotype == SOCK_STREAM) {
818                         aio.iov_base = (caddr_t) & len;
819                         aio.iov_len = sizeof(u_int32_t);
820                         auio.uio_iov = &aio;
821                         auio.uio_iovcnt = 1;
822                         auio.uio_segflg = UIO_SYSSPACE;
823                         auio.uio_rw = UIO_READ;
824                         auio.uio_offset = 0;
825                         auio.uio_resid = sizeof(u_int32_t);
826 #ifdef __OpenBSD__
827                         auio.uio_procp = td;
828 #else
829                         auio.uio_td = td;
830 #endif
831                         do {
832                                 rcvflg = MSG_WAITALL;
833                                 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
834                                 if (error == EWOULDBLOCK && rep) {
835                                         if (rep->r_flags & R_SOFTTERM)
836                                                 RPC_RETURN(EINTR);
837                                 }
838                         } while (error == EWOULDBLOCK);
839                         if (!error && auio.uio_resid > 0) {
840                                 log(LOG_INFO,
841                                 "short receive (%zu/%zu) from rpc server %s\n",
842                                     sizeof(u_int32_t) - auio.uio_resid,
843                                     sizeof(u_int32_t),
844                                     rep->r_rpcclnt->rc_prog->prog_name);
845                                 error = EPIPE;
846                         }
847                         if (error)
848                                 goto errout;
849                         len = ntohl(len) & ~0x80000000;
850                         /*
851                          * This is SERIOUS! We are out of sync with the
852                          * sender and forcing a disconnect/reconnect is all I
853                          * can do.
854                          */
855                         if (len > RPC_MAXPACKET) {
856                                 log(LOG_ERR, "%s (%d) from rpc server %s\n",
857                                     "impossible packet length",
858                                     len,
859                                     rep->r_rpcclnt->rc_prog->prog_name);
860                                 error = EFBIG;
861                                 goto errout;
862                         }
863                         auio.uio_resid = len;
864                         do {
865                                 rcvflg = MSG_WAITALL;
866                                 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
867                         } while (error == EWOULDBLOCK || error == EINTR ||
868                                  error == ERESTART);
869                         if (!error && auio.uio_resid > 0) {
870                                 log(LOG_INFO,
871                                 "short receive (%d/%d) from rpc server %s\n",
872                                     len - auio.uio_resid, len,
873                                     rep->r_rpcclnt->rc_prog->prog_name);
874                                 error = EPIPE;
875                         }
876                 } else {
877                         /*
878                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
879                          * and soreceive() will return when it has either a
880                          * control msg or a data msg. We have no use for
881                          * control msg., but must grab them and then throw
882                          * them away so we know what is going on.
883                          */
884                         auio.uio_resid = len = 100000000;       /* Anything Big */
885 #ifdef __OpenBSD__
886                         auio.uio_procp = td;
887 #else
888                         auio.uio_td = td;
889 #endif
890                         do {
891                                 rcvflg = 0;
892                                 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
893                                 if (control)
894                                         m_freem(control);
895                                 if (error == EWOULDBLOCK && rep) {
896                                         if (rep->r_flags & R_SOFTTERM)
897                                                 RPC_RETURN(EINTR);
898                                 }
899                         } while (error == EWOULDBLOCK ||
900                                  (!error && *mp == NULL && control));
901                         if ((rcvflg & MSG_EOR) == 0)
902                                 printf("Egad!!\n");
903                         if (!error && *mp == NULL)
904                                 error = EPIPE;
905                         len -= auio.uio_resid;
906                 }
907 errout:
908                 if (error && error != EINTR && error != ERESTART) {
909                         m_freem(*mp);
910                         *mp = (struct mbuf *) 0;
911                         if (error != EPIPE)
912                                 log(LOG_INFO,
913                                     "receive error %d from rpc server %s\n",
914                                     error,
915                                     rep->r_rpcclnt->rc_prog->prog_name);
916                         error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
917                         if (!error)
918                                 error = rpcclnt_reconnect(rep, td);
919                         if (!error)
920                                 goto tryagain;
921                 }
922         } else {
923                 if ((so = rep->r_rpcclnt->rc_so) == NULL)
924                         RPC_RETURN(EACCES);
925                 if (so->so_state & SS_ISCONNECTED)
926                         getnam = NULL;
927                 else
928                         getnam = aname;
929                 auio.uio_resid = len = 1000000;
930 #ifdef __OpenBSD__
931                 auio.uio_procp = td;
932 #else
933                 auio.uio_td = td;
934 #endif
935
936                 do {
937                         rcvflg = 0;
938                         error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
939                         RPCDEBUG("soreceive returns %d", error);
940                         if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
941                                 RPCDEBUG("wouldblock && softerm -> EINTR");
942                                 RPC_RETURN(EINTR);
943                         }
944                 } while (error == EWOULDBLOCK);
945                 len -= auio.uio_resid;
946         }
947         if (error) {
948                 m_freem(*mp);
949                 *mp = NULL;
950         } else {
951                 /*
952                  * Search for any mbufs that are not a multiple of 4 bytes
953                  * long or with m_data not longword aligned. These could
954                  * cause pointer alignment problems, so copy them to well
955                  * aligned mbufs.
956                  */
957                 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
958         }
959         RPC_RETURN(error);
960 }
961
962
963 /*
964  * Implement receipt of reply on a socket. We must search through the list of
965  * received datagrams matching them with outstanding requests using the xid,
966  * until ours is found.
967  */
968 /* ARGSUSED */
969 static int
970 rpcclnt_reply(myrep, td)
971         struct rpctask *myrep;
972         RPC_EXEC_CTX td;
973 {
974         struct rpctask *rep;
975         struct rpcclnt *rpc = myrep->r_rpcclnt;
976         int32_t         t1;
977         struct mbuf    *mrep, *md;
978 #ifdef __OpenBSD__
979         struct mbuf    *nam;
980 #else
981         struct sockaddr *nam;
982 #endif
983         u_int32_t       rxid, *tl;
984         caddr_t         dpos, cp2;
985         int             error;
986
987         /*
988          * Loop around until we get our own reply
989          */
990         for (;;) {
991                 /*
992                  * Lock against other receivers so that I don't get stuck in
993                  * sbwait() after someone else has received my reply for me.
994                  * Also necessary for connection based protocols to avoid
995                  * race conditions during a reconnect.
996                  */
997                 error = rpcclnt_rcvlock(myrep);
998                 if (error)
999                         RPC_RETURN(error);
1000                 /* Already received, bye bye */
1001                 if (myrep->r_mrep != NULL) {
1002                         rpcclnt_rcvunlock(&rpc->rc_flag);
1003                         RPC_RETURN(0);
1004                 }
1005                 /*
1006                  * Get the next Rpc reply off the socket
1007                  */
1008                 error = rpcclnt_receive(myrep, &nam, &mrep, td);
1009
1010                 rpcclnt_rcvunlock(&rpc->rc_flag);
1011
1012                 if (error) {
1013                         /*
1014                          * Ignore routing errors on connectionless
1015                          * protocols??
1016                          */
1017                         if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
1018                                 rpc->rc_so->so_error = 0;
1019                                 if (myrep->r_flags & R_GETONEREP)
1020                                         RPC_RETURN(0);
1021                                 RPCDEBUG("ingoring routing error on connectionless protocol.");
1022                                 continue;
1023                         }
1024                         RPC_RETURN(error);
1025                 }
1026 #ifdef __OpenBSD__
1027                 if (nam)
1028                         m_freem(nam);
1029 #else
1030                 if (nam)
1031                         FREE(nam, M_SONAME);
1032 #endif
1033
1034                 /*
1035                  * Get the xid and check that it is an rpc reply
1036                  */
1037                 md = mrep;
1038                 dpos = mtod(md, caddr_t);
1039                 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1040                 rxid = *tl++;
1041                 if (*tl != rpc_reply) {
1042                         rpcstats.rpcinvalid++;
1043                         m_freem(mrep);
1044 rpcmout:
1045                         if (myrep->r_flags & R_GETONEREP)
1046                                 RPC_RETURN(0);
1047                         continue;
1048                 }
1049                 /*
1050                  * Loop through the request list to match up the reply Iff no
1051                  * match, just drop the datagram
1052                  */
1053                 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1054                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1055                                 /* Found it.. */
1056                                 rep->r_mrep = mrep;
1057                                 rep->r_md = md;
1058                                 rep->r_dpos = dpos;
1059
1060                                 /*
1061                                  * Update congestion window. Do the additive
1062                                  * increase of one rpc/rtt.
1063                                  */
1064                                 if (rpc->rc_cwnd <= rpc->rc_sent) {
1065                                         rpc->rc_cwnd +=
1066                                                 (RPC_CWNDSCALE * RPC_CWNDSCALE +
1067                                         (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
1068                                         if (rpc->rc_cwnd > RPC_MAXCWND)
1069                                                 rpc->rc_cwnd = RPC_MAXCWND;
1070                                 }
1071                                 rep->r_flags &= ~R_SENT;
1072                                 rpc->rc_sent -= RPC_CWNDSCALE;
1073                                 /*
1074                                  * Update rtt using a gain of 0.125 on the
1075                                  * mean and a gain of 0.25 on the deviation.
1076                                  */
1077                                 if (rep->r_flags & R_TIMING) {
1078                                         /*
1079                                          * Since the timer resolution of
1080                                          * NFS_HZ is so course, it can often
1081                                          * result in r_rtt == 0. Since r_rtt
1082                                          * == N means that the actual rtt is
1083                                          * between N+dt and N+2-dt ticks, add
1084                                          * 1.
1085                                          */
1086                                         t1 = rep->r_rtt + 1;
1087                                         t1 -= (RPC_SRTT(rpc, rep) >> 3);
1088                                         RPC_SRTT(rpc, rep) += t1;
1089                                         if (t1 < 0)
1090                                                 t1 = -t1;
1091                                         t1 -= (RPC_SDRTT(rpc, rep) >> 2);
1092                                         RPC_SDRTT(rpc, rep) += t1;
1093                                 }
1094                                 rpc->rc_timeouts = 0;
1095                                 break;
1096                         }
1097                 }
1098                 /*
1099                  * If not matched to a request, drop it. If it's mine, get
1100                  * out.
1101                  */
1102                 if (rep == 0) {
1103                         rpcstats.rpcunexpected++;
1104                         RPCDEBUG("rpc reply not matched\n");
1105                         m_freem(mrep);
1106                 } else if (rep == myrep) {
1107                         if (rep->r_mrep == NULL)
1108                                 panic("rpcreply nil");
1109                         RPC_RETURN(0);
1110                 }
1111                 if (myrep->r_flags & R_GETONEREP)
1112                         RPC_RETURN(0);
1113         }
1114 }
1115
1116 /* XXX: ignores tryagain! */
1117 /*
1118  * code from nfs_request - goes something like this
1119  *      - fill in task struct
1120  *      - links task into list
1121  *      - calls rpcclnt_send() for first transmit
1122  *      - calls rpcclnt_reply() to get reply
1123  *      - fills in reply (which should be initialized prior to
1124  *        calling), which is valid when 0 is returned and is
1125  *        NEVER freed in this function
1126  * 
1127  * nb: always frees the request header, but NEVER frees 'mrest'
1128  * 
1129  * rpcclnt_setauth() should be used before calling this. EAUTH is returned if
1130  * authentication fails.
1131  *
1132  * note that reply->result_* are invalid unless reply->type ==
1133  * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
1134  * are invalid unless reply->type == RPC_MSGACCEPTED
1135  */
1136 int
1137 rpcclnt_request(rpc, mrest, procnum, td, cred, reply)
1138         struct rpcclnt *rpc;
1139         struct mbuf    *mrest;
1140         int             procnum;
1141         RPC_EXEC_CTX    td;
1142         struct ucred   *cred;
1143         struct rpc_reply *reply;
1144 {
1145         struct mbuf    *m, *mrep;
1146         struct rpctask *task;
1147         u_int32_t      *tl;
1148         struct mbuf    *md, *mheadend;
1149         caddr_t         dpos, cp2;
1150         int             t1, s, error = 0, mrest_len;
1151         u_int32_t       xid;
1152
1153 #ifdef __OpenBSD__
1154         task = pool_get(&rpctask_pool, PR_WAITOK);
1155 #else
1156         MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO));
1157 #endif
1158
1159         task->r_rpcclnt = rpc;
1160         task->r_procnum = procnum;
1161         task->r_td = td;
1162
1163         mrest_len = m_length(mrest, NULL);
1164
1165         m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend,
1166             cred);
1167         /*
1168          * This can happen if the auth_type is neither UNIX or NULL
1169          */
1170         if (m == NULL) {
1171 #ifdef __OpenBSD__
1172                 pool_put(&rpctask_pool, task);
1173 #else
1174                 FREE(task, M_RPC);
1175 #endif
1176                 error = EPROTONOSUPPORT;
1177                 goto rpcmout;
1178         }
1179
1180         /*
1181          * For stream protocols, insert a Sun RPC Record Mark.
1182          */
1183         if (rpc->rc_sotype == SOCK_STREAM) {
1184                 M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
1185                 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1186                                          (m->m_pkthdr.len - RPCX_UNSIGNED));
1187         }
1188         task->r_mreq = m;
1189         task->r_xid = xid;
1190
1191         if (rpc->rc_flag & RPCCLNT_SOFT)
1192                 task->r_retry = rpc->rc_retry;
1193         else
1194                 task->r_retry = RPC_MAXREXMIT + 1;      /* past clip limit */
1195         task->r_rtt = task->r_rexmit = 0;
1196
1197         if (rpcclnt_proct(rpc, procnum) > 0)
1198                 task->r_flags = R_TIMING;
1199         else
1200                 task->r_flags = 0;
1201         task->r_mrep = NULL;
1202
1203         /*
1204          * Do the client side RPC.
1205          */
1206         rpcstats.rpcrequests++;
1207
1208         /*
1209          * Chain request into list of outstanding requests. Be sure to put it
1210          * LAST so timer finds oldest requests first.
1211          */
1212         s = splsoftclock();
1213         if (TAILQ_EMPTY(&rpctask_q))
1214                 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer,
1215                     NULL);
1216         TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);
1217
1218         /*
1219          * If backing off another request or avoiding congestion, don't send
1220          * this one now but let timer do it. If not timing a request, do it
1221          * now.
1222          */
1223         if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
1224                            (rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1225                            rpc->rc_sent < rpc->rc_cwnd)) {
1226                 splx(s);
1227
1228                 if (rpc->rc_soflags & PR_CONNREQUIRED)
1229                         error = rpcclnt_sndlock(&rpc->rc_flag, task);
1230                 if (!error) {
1231                         error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
1232                                              m_copym(m, 0, M_COPYALL, M_TRYWAIT),
1233                                              task);
1234                         if (rpc->rc_soflags & PR_CONNREQUIRED)
1235                                 rpcclnt_sndunlock(&rpc->rc_flag);
1236                 }
1237                 if (!error && (task->r_flags & R_MUSTRESEND) == 0) {
1238                         rpc->rc_sent += RPC_CWNDSCALE;
1239                         task->r_flags |= R_SENT;
1240                 }
1241         } else {
1242                 splx(s);
1243                 task->r_rtt = -1;
1244         }
1245
1246         /*
1247          * Wait for the reply from our send or the timer's.
1248          */
1249         if (!error || error == EPIPE)
1250                 error = rpcclnt_reply(task, td);
1251
1252         /*
1253          * RPC done, unlink the request.
1254          */
1255         s = splsoftclock();
1256         TAILQ_REMOVE(&rpctask_q, task, r_chain);
1257         if (TAILQ_EMPTY(&rpctask_q))
1258                 callout_stop(&rpcclnt_callout);
1259         splx(s);
1260
1261         /*
1262          * Decrement the outstanding request count.
1263          */
1264         if (task->r_flags & R_SENT) {
1265                 task->r_flags &= ~R_SENT;       /* paranoia */
1266                 rpc->rc_sent -= RPC_CWNDSCALE;
1267         }
1268         /*
1269          * If there was a successful reply and a tprintf msg. tprintf a
1270          * response.
1271          */
1272         if (!error && (task->r_flags & R_TPRINTFMSG)) {
1273                 mtx_lock(&Giant);
1274                 rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name,
1275                             "is alive again");
1276                 mtx_unlock(&Giant);
1277         }
1278
1279         /* free request header (leaving mrest) */
1280         mheadend->m_next = NULL;
1281         m_freem(task->r_mreq);
1282
1283         /* initialize reply */
1284         reply->mrep = task->r_mrep;
1285         reply->verf_md = NULL;
1286         reply->result_md = NULL;
1287
1288         mrep = task->r_mrep;
1289         md = task->r_md;
1290         dpos = task->r_dpos;
1291
1292         /* task structure is no longer needed */
1293 #ifdef __OpenBSD__
1294         pool_put(&rpctask_pool, task);
1295 #else
1296         FREE(task, M_RPC);
1297 #endif
1298
1299         if (error)
1300                 goto rpcmout;
1301
1302         /*
1303          * break down the rpc header and check if ok
1304          */
1305
1306         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1307         reply->stat.type = fxdr_unsigned(u_int32_t, *tl);
1308
1309         if (reply->stat.type == RPC_MSGDENIED) {
1310                 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1311                 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1312
1313                 switch (reply->stat.status) {
1314                 case RPC_MISMATCH:
1315                         rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1316                         reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1317                         reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1318                         error = EOPNOTSUPP;
1319                         break;
1320                 case RPC_AUTHERR:
1321                         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1322                         reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
1323                         error = EACCES;
1324                         break;
1325                 default:
1326                         error = EBADRPC;
1327                         break;
1328                 }
1329                 goto rpcmout;
1330         } else if (reply->stat.type != RPC_MSGACCEPTED) {
1331                 error = EBADRPC;
1332                 goto rpcmout;
1333         }
1334
1335         rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1336
1337         reply->verf_md = md;
1338         reply->verf_dpos = dpos;
1339
1340         reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
1341         reply->verf_size = fxdr_unsigned(u_int32_t, *tl);
1342
1343         if (reply->verf_size != 0)
1344                 rpcm_adv(rpcm_rndup(reply->verf_size));
1345
1346         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1347         reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1348
1349         if (reply->stat.status == RPC_SUCCESS) {
1350                 if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
1351                         RPCDEBUG("where is the next mbuf?");
1352                         RPCDEBUG("%d -> %d",
1353                             (int)(dpos - mtod(md, caddr_t)), md->m_len);
1354                         if (md->m_next == NULL) {
1355                                 error = EBADRPC;
1356                                 goto rpcmout;
1357                         } else {
1358                                 reply->result_md = md->m_next;
1359                                 reply->result_dpos = mtod(reply->result_md,
1360                                     caddr_t);
1361                         }
1362                 } else {
1363                         reply->result_md = md;
1364                         reply->result_dpos = dpos;
1365                 }
1366         } else if (reply->stat.status == RPC_PROGMISMATCH) {
1367                 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1368                 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1369                 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1370                 error = EOPNOTSUPP;
1371                 goto rpcmout;
1372         } else {
1373                 error = EPROTONOSUPPORT;
1374                 goto rpcmout;
1375         }
1376         error = 0;
1377
1378 rpcmout:
1379         RPC_RETURN(error);
1380 }
1381
1382
1383 /*
1384  * RPC timer routine
1385  * Scan the rpctask list and retranmit any requests that have timed out.
1386  * To avoid retransmission attempts on STREAM sockets (in the future) make
1387  * sure to set the r_retry field to 0 (implies nm_retry == 0).
1388  */
1389 void
1390 rpcclnt_timer(arg)
1391         void           *arg;
1392 {
1393 #ifdef __OpenBSD__
1394         struct timeout *to = (struct timeout *) arg;
1395 #endif
1396         struct rpctask *rep;
1397         struct mbuf    *m;
1398         struct socket  *so;
1399         struct rpcclnt *rpc;
1400         int             timeo;
1401         int             s, error;
1402
1403 #ifndef __OpenBSD__
1404         struct thread  *td = curthread;
1405 #endif
1406
1407 #ifdef __OpenBSD__
1408         s = splsoftnet();
1409 #else
1410         s = splnet();
1411 #endif
1412         mtx_lock(&Giant);       /* rpc_msg -> tprintf */
1413         TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1414                 rpc = rep->r_rpcclnt;
1415                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1416                         continue;
1417                 if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
1418                         rep->r_flags |= R_SOFTTERM;
1419                         continue;
1420                 }
1421                 if (rep->r_rtt >= 0) {
1422                         rep->r_rtt++;
1423                         if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
1424                                 timeo = rpc->rc_timeo;
1425                         else
1426                                 timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt,
1427                                                            rep->r_procnum));
1428                         if (rpc->rc_timeouts > 0)
1429                                 timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
1430                         if (rep->r_rtt <= timeo)
1431                                 continue;
1432                         if (rpc->rc_timeouts < 8)
1433                                 rpc->rc_timeouts++;
1434                 }
1435                 /*
1436                  * Check for server not responding
1437                  */
1438                 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1439                     rep->r_rexmit > rpc->rc_deadthresh) {
1440                         rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
1441                                     "not responding");
1442                         rep->r_flags |= R_TPRINTFMSG;
1443                 }
1444                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
1445                         rpcstats.rpctimeouts++;
1446                         rep->r_flags |= R_SOFTTERM;
1447                         continue;
1448                 }
1449                 if (rpc->rc_sotype != SOCK_DGRAM) {
1450                         if (++rep->r_rexmit > RPC_MAXREXMIT)
1451                                 rep->r_rexmit = RPC_MAXREXMIT;
1452                         continue;
1453                 }
1454                 if ((so = rpc->rc_so) == NULL)
1455                         continue;
1456
1457                 /*
1458                  * If there is enough space and the window allows.. Resend it
1459                  * Set r_rtt to -1 in case we fail to send it now.
1460                  */
1461                 rep->r_rtt = -1;
1462                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1463                     ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1464                      (rep->r_flags & R_SENT) ||
1465                      rpc->rc_sent < rpc->rc_cwnd) &&
1466                     (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
1467                         if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
1468                                 error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m,
1469                                                             NULL, NULL, td);
1470                         else
1471                                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td);
1472                         if (error) {
1473                                 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
1474                                         so->so_error = 0;
1475                         } else {
1476                                 /*
1477                                  * Iff first send, start timing else turn
1478                                  * timing off, backoff timer and divide
1479                                  * congestion window by 2.
1480                                  */
1481                                 if (rep->r_flags & R_SENT) {
1482                                         rep->r_flags &= ~R_TIMING;
1483                                         if (++rep->r_rexmit > RPC_MAXREXMIT)
1484                                                 rep->r_rexmit = RPC_MAXREXMIT;
1485                                         rpc->rc_cwnd >>= 1;
1486                                         if (rpc->rc_cwnd < RPC_CWNDSCALE)
1487                                                 rpc->rc_cwnd = RPC_CWNDSCALE;
1488                                         rpcstats.rpcretries++;
1489                                 } else {
1490                                         rep->r_flags |= R_SENT;
1491                                         rpc->rc_sent += RPC_CWNDSCALE;
1492                                 }
1493                                 rep->r_rtt = 0;
1494                         }
1495                 }
1496         }
1497         mtx_unlock(&Giant);     /* rpc_msg -> tprintf */
1498         splx(s);
1499
1500 #ifdef __OpenBSD__
1501         timeout_add(rpcclnt_timer, to, rpcclnt_ticks);
1502 #else
1503         callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer, NULL);
1504 #endif
1505 }
1506
1507 /*
1508  * Test for a termination condition pending on the process. This is used for
1509  * RPCCLNT_INT mounts.
1510  */
1511 int
1512 rpcclnt_sigintr(rpc, task, pr)
1513         struct rpcclnt *rpc;
1514         struct rpctask *task;
1515         RPC_EXEC_CTX pr;
1516 {
1517         struct proc    *p;
1518
1519         sigset_t        tmpset;
1520
1521         if (rpc == NULL) 
1522                 return EFAULT;
1523
1524         /* XXX deal with forced unmounts */
1525
1526         if (task && (task->r_flags & R_SOFTTERM))
1527                 RPC_RETURN(EINTR);
1528
1529         if (!(rpc->rc_flag & RPCCLNT_INT))
1530                 RPC_RETURN(0);
1531
1532         if (pr == NULL)
1533                 return (0);
1534
1535 #ifdef __OpenBSD__
1536         p = pr;
1537         if (p && p->p_siglist &&
1538             (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1539              RPCINT_SIGMASK))
1540                 RPC_RETURN(EINTR);
1541 #else
1542         p = pr->td_proc;
1543         PROC_LOCK(p);
1544         tmpset = p->p_siglist;
1545         SIGSETNAND(tmpset, pr->td_sigmask);
1546         mtx_lock(&p->p_sigacts->ps_mtx);
1547         SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1548         mtx_unlock(&p->p_sigacts->ps_mtx);
1549         if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
1550                 PROC_UNLOCK(p);
1551                 RPC_RETURN(EINTR);
1552         }
1553         PROC_UNLOCK(p);
1554 #endif
1555         RPC_RETURN(0);
1556 }
1557
1558 /*
1559  * Lock a socket against others. Necessary for STREAM sockets to ensure you
1560  * get an entire rpc request/reply and also to avoid race conditions between
1561  * the processes with nfs requests in progress when a reconnect is necessary.
1562  */
1563 static int
1564 rpcclnt_sndlock(flagp, task)
1565         int            *flagp;
1566         struct rpctask *task;
1567 {
1568         RPC_EXEC_CTX p;
1569         int             slpflag = 0, slptimeo = 0;
1570
1571         p = task->r_td;
1572         if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
1573                 slpflag = PCATCH;
1574         while (*flagp & RPCCLNT_SNDLOCK) {
1575                 if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
1576                         RPC_RETURN(EINTR);
1577                 *flagp |= RPCCLNT_WANTSND;
1578                 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck",
1579                              slptimeo);
1580                 if (slpflag == PCATCH) {
1581                         slpflag = 0;
1582                         slptimeo = 2 * hz;
1583                 }
1584         }
1585         *flagp |= RPCCLNT_SNDLOCK;
1586         RPC_RETURN(0);
1587 }
1588
1589 /*
1590  * Unlock the stream socket for others.
1591  */
1592 static void
1593 rpcclnt_sndunlock(flagp)
1594         int            *flagp;
1595 {
1596
1597         if ((*flagp & RPCCLNT_SNDLOCK) == 0)
1598                 panic("rpc sndunlock");
1599         *flagp &= ~RPCCLNT_SNDLOCK;
1600         if (*flagp & RPCCLNT_WANTSND) {
1601                 *flagp &= ~RPCCLNT_WANTSND;
1602                 wakeup((caddr_t) flagp);
1603         }
1604 }
1605
1606 static int
1607 rpcclnt_rcvlock(task)
1608         struct rpctask *task;
1609 {
1610         int            *flagp = &task->r_rpcclnt->rc_flag;
1611         int             slpflag, slptimeo = 0;
1612
1613         if (*flagp & RPCCLNT_INT)
1614                 slpflag = PCATCH;
1615         else
1616                 slpflag = 0;
1617         while (*flagp & RPCCLNT_RCVLOCK) {
1618                 if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
1619                         RPC_RETURN(EINTR);
1620                 *flagp |= RPCCLNT_WANTRCV;
1621                 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk",
1622                              slptimeo);
1623                 if (slpflag == PCATCH) {
1624                         slpflag = 0;
1625                         slptimeo = 2 * hz;
1626                 }
1627         }
1628         *flagp |= RPCCLNT_RCVLOCK;
1629         RPC_RETURN(0);
1630 }
1631
1632 /*
1633  * Unlock the stream socket for others.
1634  */
1635 static void
1636 rpcclnt_rcvunlock(flagp)
1637         int            *flagp;
1638 {
1639
1640         if ((*flagp & RPCCLNT_RCVLOCK) == 0)
1641                 panic("nfs rcvunlock");
1642         *flagp &= ~RPCCLNT_RCVLOCK;
1643         if (*flagp & RPCCLNT_WANTRCV) {
1644                 *flagp &= ~RPCCLNT_WANTRCV;
1645                 wakeup((caddr_t) flagp);
1646         }
1647 }
1648
1649 #if 0
1650 /*
1651  * Check for badly aligned mbuf data areas and realign data in an mbuf list
1652  * by copying the data areas up, as required.
1653  */
1654 void
1655 rpcclnt_realign(m, hsiz)
1656         struct mbuf    *m;
1657         int             hsiz;
1658 {
1659         struct mbuf    *m2;
1660         int             siz, mlen, olen;
1661         caddr_t         tcp, fcp;
1662         struct mbuf    *mnew;
1663
1664         while (m) {
1665                 /*
1666                  * This never happens for UDP, rarely happens for TCP but
1667                  * frequently happens for iso transport.
1668                  */
1669                 if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) {
1670                         olen = m->m_len;
1671                         fcp = mtod(m, caddr_t);
1672                         if ((long)fcp & 0x3) {
1673                                 if (m->m_flags & M_PKTHDR)
1674                                         m_tag_delete_chain(m, NULL);
1675                                 m->m_flags &= ~M_PKTHDR;
1676                                 if (m->m_flags & M_EXT)
1677                                         m->m_data = m->m_ext.ext_buf +
1678                                                 ((m->m_ext.ext_size - olen) & ~0x3);
1679                                 else
1680                                         m->m_data = m->m_dat;
1681                         }
1682                         m->m_len = 0;
1683                         tcp = mtod(m, caddr_t);
1684                         mnew = m;
1685                         m2 = m->m_next;
1686
1687                         /*
1688                          * If possible, only put the first invariant part of
1689                          * the RPC header in the first mbuf.
1690                          */
1691                         mlen = M_TRAILINGSPACE(m);
1692                         if (olen <= hsiz && mlen > hsiz)
1693                                 mlen = hsiz;
1694
1695                         /* Loop through the mbuf list consolidating data. */
1696                         while (m) {
1697                                 while (olen > 0) {
1698                                         if (mlen == 0) {
1699                                                 if (m2->m_flags & M_PKTHDR)
1700                                                         m_tag_delete_chain(m2, NULL);
1701                                                 m2->m_flags &= ~M_PKTHDR;
1702                                                 if (m2->m_flags & M_EXT)
1703                                                         m2->m_data = m2->m_ext.ext_buf;
1704                                                 else
1705                                                         m2->m_data = m2->m_dat;
1706                                                 m2->m_len = 0;
1707                                                 mlen = M_TRAILINGSPACE(m2);
1708                                                 tcp = mtod(m2, caddr_t);
1709                                                 mnew = m2;
1710                                                 m2 = m2->m_next;
1711                                         }
1712                                         siz = min(mlen, olen);
1713                                         if (tcp != fcp)
1714                                                 bcopy(fcp, tcp, siz);
1715                                         mnew->m_len += siz;
1716                                         mlen -= siz;
1717                                         olen -= siz;
1718                                         tcp += siz;
1719                                         fcp += siz;
1720                                 }
1721                                 m = m->m_next;
1722                                 if (m) {
1723                                         olen = m->m_len;
1724                                         fcp = mtod(m, caddr_t);
1725                                 }
1726                         }
1727
1728                         /*
1729                          * Finally, set m_len == 0 for any trailing mbufs
1730                          * that have been copied out of.
1731                          */
1732                         while (m2) {
1733                                 m2->m_len = 0;
1734                                 m2 = m2->m_next;
1735                         }
1736                         return;
1737                 }
1738                 m = m->m_next;
1739         }
1740 }
1741 #else
1742 static void
1743 rpcclnt_realign(struct mbuf **pm, int hsiz)
1744 {
1745         struct mbuf *m;
1746         struct mbuf *n = NULL;
1747         int off = 0;
1748
1749         RPCDEBUG("in rpcclnt_realign()");
1750
1751         while ((m = *pm) != NULL) {
1752             if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1753                 MGET(n, M_TRYWAIT, MT_DATA);
1754                 if (m->m_len >= MINCLSIZE) {
1755                     MCLGET(n, M_TRYWAIT);
1756                 }
1757                 n->m_len = 0;
1758                 break;
1759             }
1760             pm = &m->m_next;
1761         }
1762
1763         /*
1764         * If n is non-NULL, loop on m copying data, then replace the
1765         * portion of the chain that had to be realigned.
1766         */
1767         if (n != NULL) {
1768             while (m) {
1769                 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1770                 off += m->m_len;
1771                 m = m->m_next;
1772             }
1773             m_freem(*pm);
1774             *pm = n;
1775         }
1776
1777         RPCDEBUG("leave rpcclnt_realign()");
1778 }
1779 #endif
1780
1781 static int
1782 rpcclnt_msg(p, server, msg)
1783         RPC_EXEC_CTX   p;
1784         const char     *server;
1785         char           *msg;
1786 {
1787 #ifdef __OpenBSD__
1788         tpr_t           tpr;
1789         struct proc    *pr = p;
1790
1791         if (p)
1792                 tpr = tprintf_open(p);
1793         else
1794                 tpr = NULL;
1795         tprintf(tpr, "rpc server %s: %s\n", server, msg);
1796         tprintf_close(tpr);
1797         RPC_RETURN(0);
1798 #else
1799         GIANT_REQUIRED;
1800
1801         tprintf(p ? p->td_proc : NULL, LOG_INFO,
1802                 "nfs server %s: %s\n", server, msg);
1803         RPC_RETURN(0);
1804 #endif
1805 }
1806
1807 /*
1808  * Build the RPC header and fill in the authorization info. The authorization
1809  * string argument is only used when the credentials come from outside of the
1810  * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
1811  * kernel) Returns the head of the mbuf list.
1812  */
1813 static struct mbuf    *
1814 rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred)
1815         struct rpcclnt *rc;
1816         int             procid;
1817         struct mbuf    *mrest;
1818         u_int32_t       mrest_len;
1819         int            *xidp;
1820         struct mbuf   **mheadend;
1821         struct ucred * cred;
1822 {
1823         /* register */ struct mbuf *mb;
1824         register u_int32_t *tl;
1825         /* register */ caddr_t bpos;
1826         struct mbuf *mreq, *mb2;
1827         int error;
1828
1829         MGETHDR(mb, M_TRYWAIT, MT_DATA);
1830         if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
1831                 MCLGET(mb, M_TRYWAIT);
1832         } else if (6 * RPCX_UNSIGNED < MHLEN) {
1833                 MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
1834         } else {
1835                 RPCDEBUG("mbuf too small");
1836                 panic("cheap bailout");
1837         }
1838         mb->m_len = 0;
1839         mreq = mb;
1840         bpos = mtod(mb, caddr_t);
1841
1842         /*
1843          * First the RPC header.
1844          */
1845         rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);
1846
1847         /* Get a new (non-zero) xid */
1848         if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
1849                 rpcclnt_xid = arc4random();
1850                 rpcclnt_xid_touched = 1;
1851         } else {
1852                 while ((*xidp = arc4random() % 256) == 0);
1853                 rpcclnt_xid += *xidp;
1854         }
1855
1856         /* XXX: funky... */
1857         *tl++ = *xidp = txdr_unsigned(rpcclnt_xid);
1858
1859         *tl++ = rpc_call;
1860         *tl++ = rpc_vers;
1861         *tl++ = txdr_unsigned(rc->rc_prog->prog_id);
1862         *tl++ = txdr_unsigned(rc->rc_prog->prog_version);
1863         *tl++ = txdr_unsigned(procid);
1864
1865         if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) {
1866                 m_freem(mreq);
1867                 RPCDEBUG("rpcauth_buildheader failed %d", error);
1868                 return NULL;
1869         }
1870
1871         mb->m_next = mrest;
1872         *mheadend = mb;
1873         mreq->m_pkthdr.len = m_length(mreq, NULL);
1874         mreq->m_pkthdr.rcvif = NULL;
1875         return (mreq);
1876 }
1877
1878 /*
1879  * Help break down an mbuf chain by setting the first siz bytes contiguous
1880  * pointed to by returned val. This is used by the macros rpcm_dissect and
1881  * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
1882  */
1883 static int
1884 rpcm_disct(mdp, dposp, siz, left, cp2)
1885         struct mbuf   **mdp;
1886         caddr_t        *dposp;
1887         int             siz;
1888         int             left;
1889         caddr_t        *cp2;
1890 {
1891         struct mbuf    *mp, *mp2;
1892         int             siz2, xfer;
1893         caddr_t         p;
1894
1895         mp = *mdp;
1896         while (left == 0) {
1897                 *mdp = mp = mp->m_next;
1898                 if (mp == NULL)
1899                         RPC_RETURN(EBADRPC);
1900                 left = mp->m_len;
1901                 *dposp = mtod(mp, caddr_t);
1902         }
1903         if (left >= siz) {
1904                 *cp2 = *dposp;
1905                 *dposp += siz;
1906         } else if (mp->m_next == NULL) {
1907                 RPC_RETURN(EBADRPC);
1908         } else if (siz > MHLEN) {
1909                 panic("rpc S too big");
1910         } else {
1911                 MGET(mp2, M_TRYWAIT, MT_DATA);
1912                 mp2->m_next = mp->m_next;
1913                 mp->m_next = mp2;
1914                 mp->m_len -= left;
1915                 mp = mp2;
1916                 *cp2 = p = mtod(mp, caddr_t);
1917                 bcopy(*dposp, p, left); /* Copy what was left */
1918                 siz2 = siz - left;
1919                 p += left;
1920                 mp2 = mp->m_next;
1921                 /* Loop around copying up the siz2 bytes */
1922                 while (siz2 > 0) {
1923                         if (mp2 == NULL)
1924                                 RPC_RETURN(EBADRPC);
1925                         xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
1926                         if (xfer > 0) {
1927                                 bcopy(mtod(mp2, caddr_t), p, xfer);
1928                                 RPCMADV(mp2, xfer);
1929                                 mp2->m_len -= xfer;
1930                                 p += xfer;
1931                                 siz2 -= xfer;
1932                         }
1933                         if (siz2 > 0)
1934                                 mp2 = mp2->m_next;
1935                 }
1936                 mp->m_len = siz;
1937                 *mdp = mp2;
1938                 *dposp = mtod(mp2, caddr_t);
1939         }
1940         RPC_RETURN(0);
1941 }
1942
1943
1944
1945 static u_int32_t
1946 rpcclnt_proct(rpc, procid)
1947         struct rpcclnt *rpc;
1948         u_int32_t       procid;
1949 {
1950         if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
1951             procid < rpc->rc_proctlen) {
1952                 return rpc->rc_proct[procid];
1953         }
1954         return (0);
1955 }
1956
1957 static int
1958 rpc_adv(mdp, dposp, offs, left)
1959         struct mbuf   **mdp;
1960         caddr_t        *dposp;
1961         int             offs;
1962         int             left;
1963 {
1964         struct mbuf    *m;
1965         int             s;
1966
1967         m = *mdp;
1968         s = left;
1969         while (s < offs) {
1970                 offs -= s;
1971                 m = m->m_next;
1972                 if (m == NULL)
1973                         RPC_RETURN(EBADRPC);
1974                 s = m->m_len;
1975         }
1976         *mdp = m;
1977         *dposp = mtod(m, caddr_t) + offs;
1978         RPC_RETURN(0);
1979 }
1980
1981 int
1982 rpcclnt_cancelreqs(rpc)
1983         struct rpcclnt *rpc;
1984 {
1985         struct rpctask *task;
1986         int             i, s;
1987
1988         s = splnet();
1989         TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1990                 if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
1991                     (task->r_flags & R_SOFTTERM))
1992                         continue;
1993                 rpcclnt_softterm(task);
1994         }
1995         splx(s);
1996
1997         for (i = 0; i < 30; i++) {
1998                 s = splnet();
1999                 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
2000                         if (rpc == task->r_rpcclnt)
2001                                 break;
2002                 }
2003                 splx(s);
2004                 if (task == NULL)
2005                         return (0);
2006                 tsleep(&lbolt, PSOCK, "nfscancel", 0);
2007         }
2008         return (EBUSY);
2009 }
2010
2011 static void
2012 rpcclnt_softterm(struct rpctask * task)
2013 {
2014         task->r_flags |= R_SOFTTERM;
2015         if (task->r_flags & R_SENT) {
2016                 task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
2017                 task->r_flags &= ~R_SENT;
2018         }
2019 }
2020
2021
2022 #ifndef __OpenBSD__
2023 /* called by rpcclnt_get() */
2024 void
2025 rpcclnt_create(struct rpcclnt ** rpc)
2026 {
2027         MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
2028 }
2029
2030 /* called by rpcclnt_put() */
2031 void
2032 rpcclnt_destroy(struct rpcclnt * rpc)
2033 {
2034         if (rpc != NULL) {
2035                 FREE(rpc, M_RPC);
2036         } else {
2037                 RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
2038         }
2039 }
2040 #endif                          /* !__OpenBSD__ */
2041
2042
2043 /* XXX: add a lock around the auth structure in struct rpcclnt and make this
2044  * call safe for calling durring a connection */
2045 static int
2046 rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp)
2047 {
2048         size_t authsiz, verfsiz;
2049         uint32_t mlen, grpsiz;
2050         register struct mbuf *mb, *mb2;
2051         caddr_t bpos;
2052         register u_int32_t *tl;
2053         register int i;
2054
2055         if (auth == NULL || mhdr == NULL)
2056           return EFAULT;
2057
2058         switch (auth->auth_type) {
2059         case RPCAUTH_NULL:
2060                 authsiz = 0;
2061                 verfsiz = 0;
2062         break;
2063         case RPCAUTH_UNIX:
2064                 authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED;
2065                 verfsiz = 0;
2066         break;
2067         default:
2068                 return EPROTONOSUPPORT;
2069         break;
2070         };
2071
2072         mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED;
2073
2074         mb = *mhdr;
2075         bpos = *bp;
2076
2077         rpcm_build(tl, u_int32_t *, mlen);
2078
2079         *bp = bpos;
2080         *mhdr = mb;
2081
2082         *tl++ = txdr_unsigned(auth->auth_type);
2083         *tl++ = txdr_unsigned(authsiz);
2084         switch (auth->auth_type) {
2085         case RPCAUTH_UNIX:
2086                 *tl++ = 0;
2087                 *tl++ = 0; 
2088
2089                 *tl++ = txdr_unsigned(cred->cr_uid);
2090                 *tl++ = txdr_unsigned(cred->cr_groups[0]);
2091                 grpsiz = cred->cr_ngroups;
2092                 *tl++ = txdr_unsigned(grpsiz);
2093                 /* XXX: groups[0] is already sent... */
2094                 for (i = 0 ; i < grpsiz ; i++) {
2095                          *tl++ = txdr_unsigned(cred->cr_groups[i]);
2096                 }
2097
2098                 /* null verification header */
2099                 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2100                 *tl++ = 0;
2101         break;
2102         case RPCAUTH_NULL:
2103                 /* just a null verf header */
2104                 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2105                 *tl = 0;
2106         break;
2107         default:
2108                 panic("inconsistent rpc auth type");
2109         break;
2110         }
2111
2112         return 0;
2113 }