]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/rpc/rpcclnt.c
This commit was generated by cvs2svn to compensate for changes in r161863,
[FreeBSD/FreeBSD.git] / sys / rpc / rpcclnt.c
1 /* $FreeBSD$ */
2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
3
4 /*-
5  * copyright (c) 2003
6  * the regents of the university of michigan
7  * all rights reserved
8  * 
9  * permission is granted to use, copy, create derivative works and redistribute
10  * this software and such derivative works for any purpose, so long as the name
11  * of the university of michigan is not used in any advertising or publicity
12  * pertaining to the use or distribution of this software without specific,
13  * written prior authorization.  if the above copyright notice or any other
14  * identification of the university of michigan is included in any copy of any
15  * portion of this software, then the disclaimer below must also be included.
16  * 
17  * this software is provided as is, without representation from the university
18  * of michigan as to its fitness for any purpose, and without warranty by the
19  * university of michigan of any kind, either express or implied, including
20  * without limitation the implied warranties of merchantability and fitness for
21  * a particular purpose. the regents of the university of michigan shall not be
22  * liable for any damages, including special, indirect, incidental, or
23  * consequential damages, with respect to any claim arising out of or in
24  * connection with the use of the software, even if it has been or is hereafter
25  * advised of the possibility of such damages.
26  */
27
28 /*-
29  * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30  * California.  All rights reserved.
31  * 
32  * This code is derived from software contributed to Berkeley by Rick Macklem at
33  * The University of Guelph.
34  * 
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions are
37  * met: 1. Redistributions of source code must retain the above copyright
38  * notice, this list of conditions and the following disclaimer. 2.
39  * Redistributions in binary form must reproduce the above copyright notice,
40  * this list of conditions and the following disclaimer in the documentation
41  * and/or other materials provided with the distribution. 3. All advertising
42  * materials mentioning features or use of this software must display the
43  * following acknowledgement: This product includes software developed by the
44  * University of California, Berkeley and its contributors. 4. Neither the
45  * name of the University nor the names of its contributors may be used to
46  * endorse or promote products derived from this software without specific
47  * prior written permission.
48  * 
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52  * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  * 
61  * @(#)nfs_socket.c     8.5 (Berkeley) 3/30/95
62  */
63
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66  *      can run, but clean it up! */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
73 #include <sys/mbuf.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
76 #include <sys/uio.h>
77 #include <sys/lock.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
82
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
96
97 /* memory management */
98 #ifdef __OpenBSD__
99 struct pool     rpctask_pool;
100 struct pool     rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
103 #else
104 static          MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
105 #endif
106
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
108
109 /*
110  * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111  * and mean deviation of rtt for the appropriate type of rpc for the frequent
112  * rpcs and a default for the others. The justification for doing "other"
113  * this way is that these rpcs happen so infrequently that timer est. would
114  * probably be stale. Also, since many of these rpcs are non-idempotent, a
115  * conservative timeout is desired. getattr, lookup - A+2D read, write     -
116  * A+4D other           - nm_timeo
117  */
118 #define RPC_RTO(n, t) \
119         ((t) == 0 ? (n)->rc_timeo : \
120          ((t) < 3 ? \
121           (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122           ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
123
124 #define RPC_SRTT(s,r)   (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
125                                 (r)->r_procnum) - 1]
126
127 #define RPC_SDRTT(s,r)  (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
128                                 (r)->r_procnum) - 1]
129
130
131 /*
132  * There is a congestion window for outstanding rpcs maintained per mount
133  * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134  * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135  * August 1988. describes for TCP. The cwnd size is chopped in half on a
136  * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137  * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138  * are scaled for integer arith.) Variants of "slow start" were tried and
139  * were found to be too much of a performance hit (ave. rtt 3 times larger),
140  * I suspect due to the large rtt that nfs rpcs have.
141  */
142 #define RPC_CWNDSCALE   256
143 #define RPC_MAXCWND     (RPC_CWNDSCALE * 32)
144 static const int      rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
145
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149         "",                     /* no good message... */
150         "remote server hasn't exported program.",
151         "remote server can't support version number.",
152         "program can't support procedure.",
153         "procedure can't decode params.",
154         "remote error.  remote side memory allocation failure?"
155 };
156
157 char *rpc_errstr_denied[2] = {
158         "remote server doesnt support rpc version 2!",
159         "remote server authentication error."
160 };
161
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
164         "",
165         "auth error: bad credential (seal broken).",
166         "auth error: client must begin new session.",
167         "auth error: bad verifier (seal broken).",
168         "auth error: verifier expired or replayed.",
169         "auth error: rejected for security reasons.",
170 };
171
172 /*
173  * Static data, mostly RPC constants in XDR form
174  */
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
176
177 /*
178  * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179  * rpc_autherr, rpc_auth_kerb;
180  */
181
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
185 int      rpcclnt_ticks;
186
187 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
188
189 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
194
195
196 #ifdef RPCCLNT_DEBUG
197 int             rpcdebugon = 0;
198 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
199 #endif
200
201 /*
202  * Queue head for rpctask's
203  */
204 static 
205 TAILQ_HEAD(, rpctask) rpctask_q;
206 struct callout  rpcclnt_callout;
207
208 #ifdef __OpenBSD__
209 static int             rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
210 static int             rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
211 #else
212 static int             rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
213 static int             rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
214 #endif
215
216 static int             rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
217
218 static int             rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
219 static void            rpcclnt_timer(void *);
220 static int             rpcclnt_sndlock(int *, struct rpctask *);
221 static void            rpcclnt_sndunlock(int *);
222 static int             rpcclnt_rcvlock(struct rpctask *);
223 static void            rpcclnt_rcvunlock(int *);
224 #if 0
225 void            rpcclnt_realign(struct mbuf *, int);
226 #else
227 static void     rpcclnt_realign(struct mbuf **, int);
228 #endif
229
230 static struct mbuf    *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
231 static int             rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
232 static u_int32_t       rpcclnt_proct(struct rpcclnt *, u_int32_t);
233 static int             rpc_adv(struct mbuf **, caddr_t *, int, int);
234 static void     rpcclnt_softterm(struct rpctask * task);
235
236 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
237
238 void
239 rpcclnt_init(void)
240 {
241 #ifdef __OpenBSD__
242         static struct timeout rpcclnt_timer_to;
243 #endif
244
245         rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
246         if (rpcclnt_ticks < 1)
247                 rpcclnt_ticks = 1;
248         rpcstats.rpcretries = 0;
249         rpcstats.rpcrequests = 0;
250         rpcstats.rpctimeouts = 0;
251         rpcstats.rpcunexpected = 0;
252         rpcstats.rpcinvalid = 0;
253
254         /*
255          * rpc constants how about actually using more than one of these!
256          */
257
258         rpc_reply = txdr_unsigned(RPC_REPLY);
259         rpc_vers = txdr_unsigned(RPC_VER2);
260         rpc_call = txdr_unsigned(RPC_CALL);
261 #if 0
262         rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
263         rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
264         rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
265         rpc_autherr = txdr_unsigned(RPC_AUTHERR);
266         rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
267         rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
268 #endif
269
270         /* initialize rpctask queue */
271         TAILQ_INIT(&rpctask_q);
272
273 #ifdef __OpenBSD__
274         /* initialize pools */
275         pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
276                   "rpctask_p", NULL);
277         pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
278         pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
279
280         pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
281
282         /* initialize timers */
283         timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
284         rpcclnt_timer(&rpcclnt_timer_to);
285 #else /* !__OpenBSD__ */
286         callout_init(&rpcclnt_callout, 0);
287 #endif /* !__OpenBSD__ */
288
289         RPCDEBUG("rpc initialed");
290
291         return;
292 }
293
294 void
295 rpcclnt_uninit(void)
296 {
297         RPCDEBUG("uninit");
298         /* XXX delete sysctl variables? */
299         callout_stop(&rpcclnt_callout);
300 }
301
302 int
303 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
304     struct rpcclnt * clnt;
305     struct rpc_program * program;
306     struct sockaddr * addr;
307     int sotype;
308     int soproto;
309     struct rpc_auth * auth;
310     int max_read_size;
311     int max_write_size;
312     int flags;
313 {
314         if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
315           RPC_RETURN (EFAULT);
316
317         if (program->prog_name == NULL)
318           RPC_RETURN (EFAULT);
319         clnt->rc_prog = program;
320
321         clnt->rc_name = addr;
322         clnt->rc_sotype = sotype;
323         clnt->rc_soproto = soproto;
324         clnt->rc_auth = auth;
325         clnt->rc_rsize = max_read_size;
326         clnt->rc_wsize = max_write_size;
327         clnt->rc_flag = flags;
328
329         clnt->rc_proctlen = 0;
330         clnt->rc_proct = NULL;
331
332         RPC_RETURN (0);
333 }
334
335 /*
336  * Initialize sockets and congestion for a new RPC connection. We do not free
337  * the sockaddr if error.
338  */
339 int
340 rpcclnt_connect(rpc, td)
341         struct rpcclnt *rpc;
342         RPC_EXEC_CTX td;
343 {
344         struct socket  *so;
345         int             s, error, rcvreserve, sndreserve;
346         struct sockaddr *saddr;
347
348 #ifdef __OpenBSD__
349         struct sockaddr_in *sin;
350         struct mbuf    *m;
351 #else
352         struct sockaddr_in sin;
353
354         int             soarg;
355         struct sockopt  opt;
356 #endif
357
358         if (rpc == NULL) {
359                 RPCDEBUG("no rpcclnt struct!\n");
360                 RPC_RETURN(EFAULT);
361         }
362
363         GIANT_REQUIRED;         /* XXX until socket locking done */
364
365         /* create the socket */
366         rpc->rc_so = NULL;
367
368         saddr = rpc->rc_name;
369
370         error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
371                          rpc->rc_soproto, td->td_ucred, td);
372
373         if (error) {
374                 RPCDEBUG("error %d in socreate()", error);
375                 RPC_RETURN(error);
376         }
377         so = rpc->rc_so;
378         rpc->rc_soflags = so->so_proto->pr_flags;
379
380         /*
381          * Some servers require that the client port be a reserved port
382          * number. We always allocate a reserved port, as this prevents
383          * filehandle disclosure through UDP port capture.
384          */
385         if (saddr->sa_family == AF_INET) {
386 #ifdef __OpenBSD__
387                 struct mbuf    *mopt;
388                 int            *ip;
389 #endif
390
391 #ifdef __OpenBSD__
392                 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
393                 mopt->m_len = sizeof(int);
394                 ip = mtod(mopt, int *);
395                 *ip = IP_PORTRANGE_LOW;
396
397                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
398 #else
399                 soarg = IP_PORTRANGE_LOW;
400                 bzero(&opt, sizeof(struct sockopt));
401                 opt.sopt_dir = SOPT_SET;
402                 opt.sopt_level = IPPROTO_IP;
403                 opt.sopt_name = IP_PORTRANGE;
404                 opt.sopt_val = &soarg;
405                 opt.sopt_valsize = sizeof(soarg);
406
407                 error = sosetopt(so, &opt);
408 #endif
409                 if (error)
410                         goto bad;
411
412 #ifdef __OpenBSD__
413                 MGET(m, M_TRYWAIT, MT_SONAME);
414                 sin = mtod(m, struct sockaddr_in *);
415                 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
416                 sin->sin_family = AF_INET;
417                 sin->sin_addr.s_addr = INADDR_ANY;
418                 sin->sin_port = htons(0);
419                 error = sobind(so, m);
420                 m_freem(m);
421 #else
422                 sin.sin_len = sizeof(struct sockaddr_in);
423                 sin.sin_family = AF_INET;
424                 sin.sin_addr.s_addr = INADDR_ANY;
425                 sin.sin_port = htons(0);
426                 /*
427                  * &thread0 gives us root credentials to ensure sobind
428                  * will give us a reserved ephemeral port.
429                  */
430                 error = sobind(so, (struct sockaddr *) & sin, &thread0);
431 #endif
432                 if (error)
433                         goto bad;
434
435 #ifdef __OpenBSD__
436                 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
437                 mopt->m_len = sizeof(int);
438                 ip = mtod(mopt, int *);
439                 *ip = IP_PORTRANGE_DEFAULT;
440                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
441 #else
442                 soarg = IP_PORTRANGE_DEFAULT;
443                 bzero(&opt, sizeof(struct sockopt));
444                 opt.sopt_dir = SOPT_SET;
445                 opt.sopt_level = IPPROTO_IP;
446                 opt.sopt_name = IP_PORTRANGE;
447                 opt.sopt_val = &soarg;
448                 opt.sopt_valsize = sizeof(soarg);
449                 error = sosetopt(so, &opt);
450 #endif
451                 if (error)
452                         goto bad;
453         }
454         /*
455          * Protocols that do not require connections may be optionally left
456          * unconnected for servers that reply from a port other than
457          * NFS_PORT.
458          */
459         if (rpc->rc_flag & RPCCLNT_NOCONN) {
460                 if (rpc->rc_soflags & PR_CONNREQUIRED) {
461                         error = ENOTCONN;
462                         goto bad;
463                 }
464         } else {
465                 error = soconnect(so, saddr, td);
466                 if (error)
467                         goto bad;
468
469                 /*
470                  * Wait for the connection to complete. Cribbed from the
471                  * connect system call but with the wait timing out so that
472                  * interruptible mounts don't hang here for a long time.
473                  */
474 #ifdef __OpenBSD__
475                 s = splsoftnet();
476 #else
477                 s = splnet();
478 #endif
479                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
480                         (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
481                                      "rpc", 2 * hz);
482
483                         /*
484                          * XXX needs to catch interrupt signals. something
485                          * like this: if ((so->so_state & SS_ISCONNECTING) &&
486                          * so->so_error == 0 && rep && (error =
487                          * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
488                          * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
489                          * bad; }
490                          */
491                 }
492                 if (so->so_error) {
493                         error = so->so_error;
494                         so->so_error = 0;
495                         splx(s);
496                         goto bad;
497                 }
498                 splx(s);
499         }
500         if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
501                 so->so_rcv.sb_timeo = (5 * hz);
502                 so->so_snd.sb_timeo = (5 * hz);
503         } else {
504                 so->so_rcv.sb_timeo = 0;
505                 so->so_snd.sb_timeo = 0;
506         }
507
508
509         if (rpc->rc_sotype == SOCK_DGRAM) {
510                 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
511                 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
512         } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
513                 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
514                 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
515         } else {
516                 if (rpc->rc_sotype != SOCK_STREAM)
517                         panic("rpcclnt_connect() bad sotype");
518                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
519 #ifdef __OpenBSD__
520                         MGET(m, M_TRYWAIT, MT_SOOPTS);
521                         *mtod(m, int32_t *) = 1;
522                         m->m_len = sizeof(int32_t);
523                         sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
524 #else
525                         soarg = 1;
526
527                         bzero(&opt, sizeof(struct sockopt));
528                         opt.sopt_dir = SOPT_SET;
529                         opt.sopt_level = SOL_SOCKET;
530                         opt.sopt_name = SO_KEEPALIVE;
531                         opt.sopt_val = &soarg;
532                         opt.sopt_valsize = sizeof(soarg);
533                         sosetopt(so, &opt);
534 #endif
535                 }
536                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
537 #ifdef __OpenBSD__
538                         MGET(m, M_TRYWAIT, MT_SOOPTS);
539                         *mtod(m, int32_t *) = 1;
540                         m->m_len = sizeof(int32_t);
541                         sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
542 #else
543                         soarg = 1;
544
545                         bzero(&opt, sizeof(struct sockopt));
546                         opt.sopt_dir = SOPT_SET;
547                         opt.sopt_level = IPPROTO_TCP;
548                         opt.sopt_name = TCP_NODELAY;
549                         opt.sopt_val = &soarg;
550                         opt.sopt_valsize = sizeof(soarg);
551                         sosetopt(so, &opt);
552 #endif
553                 }
554                 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
555                               sizeof(u_int32_t)) * 2;
556                 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
557                               sizeof(u_int32_t)) * 2;
558         }
559         error = soreserve(so, sndreserve, rcvreserve);
560         if (error)
561                 goto bad;
562         so->so_rcv.sb_flags |= SB_NOINTR;
563         so->so_snd.sb_flags |= SB_NOINTR;
564
565         /* Initialize other non-zero congestion variables */
566         rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
567                  rpc->rc_srtt[3] = (RPC_TIMEO << 3);
568         rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
569                 rpc->rc_sdrtt[3] = 0;
570         rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
571         rpc->rc_sent = 0;
572         rpc->rc_timeouts = 0;
573         RPC_RETURN(0);
574
575 bad:
576         rpcclnt_disconnect(rpc);
577         RPC_RETURN(error);
578 }
579
580
581 /*
582  * Reconnect routine:
583  * Called when a connection is broken on a reliable protocol.
584  * - clean up the old socket
585  * - rpcclnt_connect() again
586  * - set R_MUSTRESEND for all outstanding requests on mount point
587  * If this fails the mount point is DEAD!
588  * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
589  */
590 int
591 rpcclnt_reconnect(rep, td)
592         struct rpctask *rep;
593         RPC_EXEC_CTX td;
594 {
595         struct rpctask *rp;
596         struct rpcclnt *rpc = rep->r_rpcclnt;
597         int             error;
598
599         rpcclnt_disconnect(rpc);
600         while ((error = rpcclnt_connect(rpc, td)) != 0) {
601                 if (error == EINTR || error == ERESTART)
602                         RPC_RETURN(EINTR);
603                 tsleep(&lbolt, PSOCK, "rpccon", 0);
604         }
605
606         /*
607          * Loop through outstanding request list and fix up all requests on
608          * old socket.
609          */
610         for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
611              rp = TAILQ_NEXT(rp, r_chain)) {
612                 if (rp->r_rpcclnt == rpc)
613                         rp->r_flags |= R_MUSTRESEND;
614         }
615         RPC_RETURN(0);
616 }
617
618 /*
619  * RPC transport disconnect. Clean up and unlink.
620  */
621 void
622 rpcclnt_disconnect(rpc)
623         struct rpcclnt *rpc;
624 {
625         struct socket  *so;
626
627         GIANT_REQUIRED;         /* XXX until socket locking done */
628
629         if (rpc->rc_so) {
630                 so = rpc->rc_so;
631                 rpc->rc_so = NULL;
632                 soshutdown(so, 2);
633                 soclose(so);
634         }
635 }
636
637 void
638 rpcclnt_safedisconnect(struct rpcclnt * rpc)
639 {
640         struct rpctask  dummytask;
641
642         bzero(&dummytask, sizeof(dummytask));
643         dummytask.r_rpcclnt = rpc;
644         rpcclnt_rcvlock(&dummytask);
645         rpcclnt_disconnect(rpc);
646         rpcclnt_rcvunlock(&rpc->rc_flag);
647 }
648
649 /*
650  * This is the rpc send routine. For connection based socket types, it
651  * must be called with an rpcclnt_sndlock() on the socket.
652  * "rep == NULL" indicates that it has been called from a server.
653  * For the client side:
654  * - return EINTR if the RPC is terminated, 0 otherwise
655  * - set R_MUSTRESEND if the send fails for any reason
656  * - do any cleanup required by recoverable socket errors (?)
657  * For the server side:
658  * - return EINTR or ERESTART if interrupted by a signal
659  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
660  * - do any cleanup required by recoverable socket errors (?)
661  */
662 static int
663 rpcclnt_send(so, nam, top, rep)
664         struct socket  *so;
665 #ifdef __OpenBSD__
666         struct mbuf    *nam;
667 #else
668         struct sockaddr *nam;
669 #endif
670         struct mbuf    *top;
671         struct rpctask *rep;
672 {
673 #ifdef __OpenBSD__
674         struct mbuf    *sendnam;
675 #else
676         struct sockaddr *sendnam;
677         struct thread  *td = curthread;
678 #endif
679         int error, soflags, flags;
680
681         GIANT_REQUIRED;         /* XXX until socket locking done */
682
683         if (rep) {
684                 if (rep->r_flags & R_SOFTTERM) {
685                         m_freem(top);
686                         RPC_RETURN(EINTR);
687                 }
688                 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
689                         rep->r_flags |= R_MUSTRESEND;
690                         m_freem(top);
691                         RPC_RETURN(0);
692                 }
693                 rep->r_flags &= ~R_MUSTRESEND;
694                 soflags = rep->r_rpcclnt->rc_soflags;
695         } else
696                 soflags = so->so_proto->pr_flags;
697
698         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
699                 sendnam = NULL;
700         else
701                 sendnam = nam;
702
703         if (so->so_type == SOCK_SEQPACKET)
704                 flags = MSG_EOR;
705         else
706                 flags = 0;
707
708         error = sosend(so, sendnam, NULL, top, NULL, flags, td);
709
710         if (error) {
711                 if (rep) {
712                         log(LOG_INFO, "rpc send error %d for service %s\n", error,
713                             rep->r_rpcclnt->rc_prog->prog_name);
714                         /*
715                          * Deal with errors for the client side.
716                          */
717                         if (rep->r_flags & R_SOFTTERM)
718                                 error = EINTR;
719                         else
720                                 rep->r_flags |= R_MUSTRESEND;
721                 } else
722                         log(LOG_INFO, "rpc service send error %d\n", error);
723
724                 /*
725                  * Handle any recoverable (soft) socket errors here.
726                  */
727                 if (error != EINTR && error != ERESTART &&
728                     error != EWOULDBLOCK && error != EPIPE)
729                         error = 0;
730         }
731         RPC_RETURN(error);
732 }
733
734 /*
735  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
736  * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
737  * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
738  * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
739  * must be very careful to read an entire record once we have read any of it,
740  * even if the system call has been interrupted.
741  */
742 static int
743 rpcclnt_receive(rep, aname, mp, td)
744         struct rpctask *rep;
745 #ifdef __OpenBSD__
746         struct mbuf   **aname;
747 #else
748         struct sockaddr **aname;
749 #endif
750         struct mbuf   **mp;
751         RPC_EXEC_CTX  td;
752 {
753         struct socket  *so;
754         struct uio      auio;
755         struct iovec    aio;
756         struct mbuf    *m;
757         struct mbuf    *control;
758         u_int32_t       len;
759 #ifdef __OpenBSD__
760         struct mbuf   **getnam;
761 #else
762         struct sockaddr **getnam;
763 #endif
764         int error, sotype, rcvflg;
765
766         GIANT_REQUIRED;         /* XXX until socket locking done */
767
768         /*
769          * Set up arguments for soreceive()
770          */
771         *mp = NULL;
772         *aname = NULL;
773         sotype = rep->r_rpcclnt->rc_sotype;
774
775         /*
776          * For reliable protocols, lock against other senders/receivers in
777          * case a reconnect is necessary. For SOCK_STREAM, first get the
778          * Record Mark to find out how much more there is to get. We must
779          * lock the socket against other receivers until we have an entire
780          * rpc request/reply.
781          */
782         if (sotype != SOCK_DGRAM) {
783                 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
784                 if (error)
785                         RPC_RETURN(error);
786 tryagain:
787                 /*
788                  * Check for fatal errors and resending request.
789                  */
790                 /*
791                  * Ugh: If a reconnect attempt just happened, rc_so would
792                  * have changed. NULL indicates a failed attempt that has
793                  * essentially shut down this mount point.
794                  */
795                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
796                         rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
797                         RPC_RETURN(EINTR);
798                 }
799                 so = rep->r_rpcclnt->rc_so;
800                 if (!so) {
801                         error = rpcclnt_reconnect(rep, td);
802                         if (error) {
803                                 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
804                                 RPC_RETURN(error);
805                         }
806                         goto tryagain;
807                 }
808                 while (rep->r_flags & R_MUSTRESEND) {
809                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
810                         rpcstats.rpcretries++;
811                         error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
812                         if (error) {
813                                 if (error == EINTR || error == ERESTART ||
814                                     (error = rpcclnt_reconnect(rep, td)) != 0) {
815                                         rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
816                                         RPC_RETURN(error);
817                                 }
818                                 goto tryagain;
819                         }
820                 }
821                 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
822                 if (sotype == SOCK_STREAM) {
823                         aio.iov_base = (caddr_t) & len;
824                         aio.iov_len = sizeof(u_int32_t);
825                         auio.uio_iov = &aio;
826                         auio.uio_iovcnt = 1;
827                         auio.uio_segflg = UIO_SYSSPACE;
828                         auio.uio_rw = UIO_READ;
829                         auio.uio_offset = 0;
830                         auio.uio_resid = sizeof(u_int32_t);
831 #ifdef __OpenBSD__
832                         auio.uio_procp = td;
833 #else
834                         auio.uio_td = td;
835 #endif
836                         do {
837                                 rcvflg = MSG_WAITALL;
838                                 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
839                                 if (error == EWOULDBLOCK && rep) {
840                                         if (rep->r_flags & R_SOFTTERM)
841                                                 RPC_RETURN(EINTR);
842                                 }
843                         } while (error == EWOULDBLOCK);
844                         if (!error && auio.uio_resid > 0) {
845                                 log(LOG_INFO,
846                                 "short receive (%zu/%zu) from rpc server %s\n",
847                                     sizeof(u_int32_t) - auio.uio_resid,
848                                     sizeof(u_int32_t),
849                                     rep->r_rpcclnt->rc_prog->prog_name);
850                                 error = EPIPE;
851                         }
852                         if (error)
853                                 goto errout;
854                         len = ntohl(len) & ~0x80000000;
855                         /*
856                          * This is SERIOUS! We are out of sync with the
857                          * sender and forcing a disconnect/reconnect is all I
858                          * can do.
859                          */
860                         if (len > RPC_MAXPACKET) {
861                                 log(LOG_ERR, "%s (%d) from rpc server %s\n",
862                                     "impossible packet length",
863                                     len,
864                                     rep->r_rpcclnt->rc_prog->prog_name);
865                                 error = EFBIG;
866                                 goto errout;
867                         }
868                         auio.uio_resid = len;
869                         do {
870                                 rcvflg = MSG_WAITALL;
871                                 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
872                         } while (error == EWOULDBLOCK || error == EINTR ||
873                                  error == ERESTART);
874                         if (!error && auio.uio_resid > 0) {
875                                 log(LOG_INFO,
876                                 "short receive (%d/%d) from rpc server %s\n",
877                                     len - auio.uio_resid, len,
878                                     rep->r_rpcclnt->rc_prog->prog_name);
879                                 error = EPIPE;
880                         }
881                 } else {
882                         /*
883                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
884                          * and soreceive() will return when it has either a
885                          * control msg or a data msg. We have no use for
886                          * control msg., but must grab them and then throw
887                          * them away so we know what is going on.
888                          */
889                         auio.uio_resid = len = 100000000;       /* Anything Big */
890 #ifdef __OpenBSD__
891                         auio.uio_procp = td;
892 #else
893                         auio.uio_td = td;
894 #endif
895                         do {
896                                 rcvflg = 0;
897                                 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
898                                 if (control)
899                                         m_freem(control);
900                                 if (error == EWOULDBLOCK && rep) {
901                                         if (rep->r_flags & R_SOFTTERM)
902                                                 RPC_RETURN(EINTR);
903                                 }
904                         } while (error == EWOULDBLOCK ||
905                                  (!error && *mp == NULL && control));
906                         if ((rcvflg & MSG_EOR) == 0)
907                                 printf("Egad!!\n");
908                         if (!error && *mp == NULL)
909                                 error = EPIPE;
910                         len -= auio.uio_resid;
911                 }
912 errout:
913                 if (error && error != EINTR && error != ERESTART) {
914                         m_freem(*mp);
915                         *mp = (struct mbuf *) 0;
916                         if (error != EPIPE)
917                                 log(LOG_INFO,
918                                     "receive error %d from rpc server %s\n",
919                                     error,
920                                     rep->r_rpcclnt->rc_prog->prog_name);
921                         error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
922                         if (!error)
923                                 error = rpcclnt_reconnect(rep, td);
924                         if (!error)
925                                 goto tryagain;
926                 }
927         } else {
928                 if ((so = rep->r_rpcclnt->rc_so) == NULL)
929                         RPC_RETURN(EACCES);
930                 if (so->so_state & SS_ISCONNECTED)
931                         getnam = NULL;
932                 else
933                         getnam = aname;
934                 auio.uio_resid = len = 1000000;
935 #ifdef __OpenBSD__
936                 auio.uio_procp = td;
937 #else
938                 auio.uio_td = td;
939 #endif
940
941                 do {
942                         rcvflg = 0;
943                         error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
944                         RPCDEBUG("soreceivce returns %d", error);
945                         if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
946                                 RPCDEBUG("wouldblock && softerm -> EINTR");
947                                 RPC_RETURN(EINTR);
948                         }
949                 } while (error == EWOULDBLOCK);
950                 len -= auio.uio_resid;
951         }
952         if (error) {
953                 m_freem(*mp);
954                 *mp = NULL;
955         } else {
956                 /*
957                  * Search for any mbufs that are not a multiple of 4 bytes
958                  * long or with m_data not longword aligned. These could
959                  * cause pointer alignment problems, so copy them to well
960                  * aligned mbufs.
961                  */
962                 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
963         }
964         RPC_RETURN(error);
965 }
966
967
968 /*
969  * Implement receipt of reply on a socket. We must search through the list of
970  * received datagrams matching them with outstanding requests using the xid,
971  * until ours is found.
972  */
973 /* ARGSUSED */
974 static int
975 rpcclnt_reply(myrep, td)
976         struct rpctask *myrep;
977         RPC_EXEC_CTX td;
978 {
979         struct rpctask *rep;
980         struct rpcclnt *rpc = myrep->r_rpcclnt;
981         int32_t         t1;
982         struct mbuf    *mrep, *md;
983 #ifdef __OpenBSD__
984         struct mbuf    *nam;
985 #else
986         struct sockaddr *nam;
987 #endif
988         u_int32_t       rxid, *tl;
989         caddr_t         dpos, cp2;
990         int             error;
991
992         /*
993          * Loop around until we get our own reply
994          */
995         for (;;) {
996                 /*
997                  * Lock against other receivers so that I don't get stuck in
998                  * sbwait() after someone else has received my reply for me.
999                  * Also necessary for connection based protocols to avoid
1000                  * race conditions during a reconnect.
1001                  */
1002                 error = rpcclnt_rcvlock(myrep);
1003                 if (error)
1004                         RPC_RETURN(error);
1005                 /* Already received, bye bye */
1006                 if (myrep->r_mrep != NULL) {
1007                         rpcclnt_rcvunlock(&rpc->rc_flag);
1008                         RPC_RETURN(0);
1009                 }
1010                 /*
1011                  * Get the next Rpc reply off the socket
1012                  */
1013                 error = rpcclnt_receive(myrep, &nam, &mrep, td);
1014
1015                 rpcclnt_rcvunlock(&rpc->rc_flag);
1016
1017                 if (error) {
1018                         /*
1019                          * Ignore routing errors on connectionless
1020                          * protocols??
1021                          */
1022                         if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
1023                                 rpc->rc_so->so_error = 0;
1024                                 if (myrep->r_flags & R_GETONEREP)
1025                                         RPC_RETURN(0);
1026                                 RPCDEBUG("ingoring routing error on connectionless protocol.");
1027                                 continue;
1028                         }
1029                         RPC_RETURN(error);
1030                 }
1031 #ifdef __OpenBSD__
1032                 if (nam)
1033                         m_freem(nam);
1034 #else
1035                 if (nam)
1036                         FREE(nam, M_SONAME);
1037 #endif
1038
1039                 /*
1040                  * Get the xid and check that it is an rpc reply
1041                  */
1042                 md = mrep;
1043                 dpos = mtod(md, caddr_t);
1044                 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1045                 rxid = *tl++;
1046                 if (*tl != rpc_reply) {
1047                         rpcstats.rpcinvalid++;
1048                         m_freem(mrep);
1049 rpcmout:
1050                         if (myrep->r_flags & R_GETONEREP)
1051                                 RPC_RETURN(0);
1052                         continue;
1053                 }
1054                 /*
1055                  * Loop through the request list to match up the reply Iff no
1056                  * match, just drop the datagram
1057                  */
1058                 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1059                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1060                                 /* Found it.. */
1061                                 rep->r_mrep = mrep;
1062                                 rep->r_md = md;
1063                                 rep->r_dpos = dpos;
1064
1065                                 /*
1066                                  * Update congestion window. Do the additive
1067                                  * increase of one rpc/rtt.
1068                                  */
1069                                 if (rpc->rc_cwnd <= rpc->rc_sent) {
1070                                         rpc->rc_cwnd +=
1071                                                 (RPC_CWNDSCALE * RPC_CWNDSCALE +
1072                                         (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
1073                                         if (rpc->rc_cwnd > RPC_MAXCWND)
1074                                                 rpc->rc_cwnd = RPC_MAXCWND;
1075                                 }
1076                                 rep->r_flags &= ~R_SENT;
1077                                 rpc->rc_sent -= RPC_CWNDSCALE;
1078                                 /*
1079                                  * Update rtt using a gain of 0.125 on the
1080                                  * mean and a gain of 0.25 on the deviation.
1081                                  */
1082                                 if (rep->r_flags & R_TIMING) {
1083                                         /*
1084                                          * Since the timer resolution of
1085                                          * NFS_HZ is so course, it can often
1086                                          * result in r_rtt == 0. Since r_rtt
1087                                          * == N means that the actual rtt is
1088                                          * between N+dt and N+2-dt ticks, add
1089                                          * 1.
1090                                          */
1091                                         t1 = rep->r_rtt + 1;
1092                                         t1 -= (RPC_SRTT(rpc, rep) >> 3);
1093                                         RPC_SRTT(rpc, rep) += t1;
1094                                         if (t1 < 0)
1095                                                 t1 = -t1;
1096                                         t1 -= (RPC_SDRTT(rpc, rep) >> 2);
1097                                         RPC_SDRTT(rpc, rep) += t1;
1098                                 }
1099                                 rpc->rc_timeouts = 0;
1100                                 break;
1101                         }
1102                 }
1103                 /*
1104                  * If not matched to a request, drop it. If it's mine, get
1105                  * out.
1106                  */
1107                 if (rep == 0) {
1108                         rpcstats.rpcunexpected++;
1109                         RPCDEBUG("rpc reply not matched\n");
1110                         m_freem(mrep);
1111                 } else if (rep == myrep) {
1112                         if (rep->r_mrep == NULL)
1113                                 panic("rpcreply nil");
1114                         RPC_RETURN(0);
1115                 }
1116                 if (myrep->r_flags & R_GETONEREP)
1117                         RPC_RETURN(0);
1118         }
1119 }
1120
1121 /* XXX: ignores tryagain! */
1122 /*
1123  * code from nfs_request - goes something like this
1124  *      - fill in task struct
1125  *      - links task into list
1126  *      - calls rpcclnt_send() for first transmit
1127  *      - calls rpcclnt_reply() to get reply
1128  *      - fills in reply (which should be initialized prior to
1129  *        calling), which is valid when 0 is returned and is
1130  *        NEVER freed in this function
1131  * 
1132  * nb: always frees the request header, but NEVER frees 'mrest'
1133  * 
1134  * rpcclnt_setauth() should be used before calling this. EAUTH is returned if
1135  * authentication fails.
1136  *
1137  * note that reply->result_* are invalid unless reply->type ==
1138  * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
1139  * are invalid unless reply->type == RPC_MSGACCEPTED
1140  */
1141 int
1142 rpcclnt_request(rpc, mrest, procnum, td, cred, reply)
1143         struct rpcclnt *rpc;
1144         struct mbuf    *mrest;
1145         int             procnum;
1146         RPC_EXEC_CTX    td;
1147         struct ucred   *cred;
1148         struct rpc_reply *reply;
1149 {
1150         struct mbuf    *m, *mrep;
1151         struct rpctask *task;
1152         u_int32_t      *tl;
1153         struct mbuf    *md, *mheadend;
1154         caddr_t         dpos, cp2;
1155         int             t1, s, error = 0, mrest_len;
1156         u_int32_t       xid;
1157
1158 #ifdef __OpenBSD__
1159         task = pool_get(&rpctask_pool, PR_WAITOK);
1160 #else
1161         MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO));
1162 #endif
1163
1164         task->r_rpcclnt = rpc;
1165         task->r_procnum = procnum;
1166         task->r_td = td;
1167
1168         mrest_len = m_length(mrest, NULL);
1169
1170         m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend,
1171             cred);
1172
1173         /*
1174          * For stream protocols, insert a Sun RPC Record Mark.
1175          */
1176         if (rpc->rc_sotype == SOCK_STREAM) {
1177                 M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
1178                 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1179                                          (m->m_pkthdr.len - RPCX_UNSIGNED));
1180         }
1181         task->r_mreq = m;
1182         task->r_xid = xid;
1183
1184         if (rpc->rc_flag & RPCCLNT_SOFT)
1185                 task->r_retry = rpc->rc_retry;
1186         else
1187                 task->r_retry = RPC_MAXREXMIT + 1;      /* past clip limit */
1188         task->r_rtt = task->r_rexmit = 0;
1189
1190         if (rpcclnt_proct(rpc, procnum) > 0)
1191                 task->r_flags = R_TIMING;
1192         else
1193                 task->r_flags = 0;
1194         task->r_mrep = NULL;
1195
1196         /*
1197          * Do the client side RPC.
1198          */
1199         rpcstats.rpcrequests++;
1200
1201         /*
1202          * Chain request into list of outstanding requests. Be sure to put it
1203          * LAST so timer finds oldest requests first.
1204          */
1205         s = splsoftclock();
1206         if (TAILQ_EMPTY(&rpctask_q))
1207                 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer,
1208                     NULL);
1209         TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);
1210
1211         /*
1212          * If backing off another request or avoiding congestion, don't send
1213          * this one now but let timer do it. If not timing a request, do it
1214          * now.
1215          */
1216         if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
1217                            (rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1218                            rpc->rc_sent < rpc->rc_cwnd)) {
1219                 splx(s);
1220
1221                 if (rpc->rc_soflags & PR_CONNREQUIRED)
1222                         error = rpcclnt_sndlock(&rpc->rc_flag, task);
1223                 if (!error) {
1224                         error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
1225                                              m_copym(m, 0, M_COPYALL, M_TRYWAIT),
1226                                              task);
1227                         if (rpc->rc_soflags & PR_CONNREQUIRED)
1228                                 rpcclnt_sndunlock(&rpc->rc_flag);
1229                 }
1230                 if (!error && (task->r_flags & R_MUSTRESEND) == 0) {
1231                         rpc->rc_sent += RPC_CWNDSCALE;
1232                         task->r_flags |= R_SENT;
1233                 }
1234         } else {
1235                 splx(s);
1236                 task->r_rtt = -1;
1237         }
1238
1239         /*
1240          * Wait for the reply from our send or the timer's.
1241          */
1242         if (!error || error == EPIPE)
1243                 error = rpcclnt_reply(task, td);
1244
1245         /*
1246          * RPC done, unlink the request.
1247          */
1248         s = splsoftclock();
1249         TAILQ_REMOVE(&rpctask_q, task, r_chain);
1250         if (TAILQ_EMPTY(&rpctask_q))
1251                 callout_stop(&rpcclnt_callout);
1252         splx(s);
1253
1254         /*
1255          * Decrement the outstanding request count.
1256          */
1257         if (task->r_flags & R_SENT) {
1258                 task->r_flags &= ~R_SENT;       /* paranoia */
1259                 rpc->rc_sent -= RPC_CWNDSCALE;
1260         }
1261         /*
1262          * If there was a successful reply and a tprintf msg. tprintf a
1263          * response.
1264          */
1265         if (!error && (task->r_flags & R_TPRINTFMSG)) {
1266                 mtx_lock(&Giant);
1267                 rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name,
1268                             "is alive again");
1269                 mtx_unlock(&Giant);
1270         }
1271
1272         /* free request header (leaving mrest) */
1273         mheadend->m_next = NULL;
1274         m_freem(task->r_mreq);
1275
1276         /* initialize reply */
1277         reply->mrep = task->r_mrep;
1278         reply->verf_md = NULL;
1279         reply->result_md = NULL;
1280
1281         mrep = task->r_mrep;
1282         md = task->r_md;
1283         dpos = task->r_dpos;
1284
1285         /* task structure is no longer needed */
1286 #ifdef __OpenBSD__
1287         pool_put(&rpctask_pool, task);
1288 #else
1289         FREE(task, M_RPC);
1290 #endif
1291
1292         if (error)
1293                 goto rpcmout;
1294
1295         /*
1296          * break down the rpc header and check if ok
1297          */
1298
1299         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1300         reply->stat.type = fxdr_unsigned(u_int32_t, *tl);
1301
1302         if (reply->stat.type == RPC_MSGDENIED) {
1303                 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1304                 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1305
1306                 switch (reply->stat.status) {
1307                 case RPC_MISMATCH:
1308                         rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1309                         reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1310                         reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1311                         error = EOPNOTSUPP;
1312                         break;
1313                 case RPC_AUTHERR:
1314                         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1315                         reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
1316                         error = EACCES;
1317                         break;
1318                 default:
1319                         error = EBADRPC;
1320                         break;
1321                 }
1322                 goto rpcmout;
1323         } else if (reply->stat.type != RPC_MSGACCEPTED) {
1324                 error = EBADRPC;
1325                 goto rpcmout;
1326         }
1327
1328         rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1329
1330         reply->verf_md = md;
1331         reply->verf_dpos = dpos;
1332
1333         reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
1334         reply->verf_size = fxdr_unsigned(u_int32_t, *tl);
1335
1336         if (reply->verf_size != 0)
1337                 rpcm_adv(rpcm_rndup(reply->verf_size));
1338
1339         rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1340         reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1341
1342         if (reply->stat.status == RPC_SUCCESS) {
1343                 if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
1344                         RPCDEBUG("where is the next mbuf?");
1345                         RPCDEBUG("%d -> %d",
1346                             (int)(dpos - mtod(md, caddr_t)), md->m_len);
1347                         if (md->m_next == NULL) {
1348                                 error = EBADRPC;
1349                                 goto rpcmout;
1350                         } else {
1351                                 reply->result_md = md->m_next;
1352                                 reply->result_dpos = mtod(reply->result_md,
1353                                     caddr_t);
1354                         }
1355                 } else {
1356                         reply->result_md = md;
1357                         reply->result_dpos = dpos;
1358                 }
1359         } else if (reply->stat.status == RPC_PROGMISMATCH) {
1360                 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1361                 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1362                 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1363                 error = EOPNOTSUPP;
1364                 goto rpcmout;
1365         } else {
1366                 error = EPROTONOSUPPORT;
1367                 goto rpcmout;
1368         }
1369         error = 0;
1370
1371 rpcmout:
1372         RPC_RETURN(error);
1373 }
1374
1375
1376 /*
1377  * RPC timer routine
1378  * Scan the rpctask list and retranmit any requests that have timed out.
1379  * To avoid retransmission attempts on STREAM sockets (in the future) make
1380  * sure to set the r_retry field to 0 (implies nm_retry == 0).
1381  */
1382 void
1383 rpcclnt_timer(arg)
1384         void           *arg;
1385 {
1386 #ifdef __OpenBSD__
1387         struct timeout *to = (struct timeout *) arg;
1388 #endif
1389         struct rpctask *rep;
1390         struct mbuf    *m;
1391         struct socket  *so;
1392         struct rpcclnt *rpc;
1393         int             timeo;
1394         int             s, error;
1395
1396 #ifndef __OpenBSD__
1397         struct thread  *td = curthread;
1398 #endif
1399
1400 #ifdef __OpenBSD__
1401         s = splsoftnet();
1402 #else
1403         s = splnet();
1404 #endif
1405         mtx_lock(&Giant);       /* rpc_msg -> tprintf */
1406         TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1407                 rpc = rep->r_rpcclnt;
1408                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1409                         continue;
1410                 if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
1411                         rep->r_flags |= R_SOFTTERM;
1412                         continue;
1413                 }
1414                 if (rep->r_rtt >= 0) {
1415                         rep->r_rtt++;
1416                         if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
1417                                 timeo = rpc->rc_timeo;
1418                         else
1419                                 timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt,
1420                                                            rep->r_procnum));
1421                         if (rpc->rc_timeouts > 0)
1422                                 timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
1423                         if (rep->r_rtt <= timeo)
1424                                 continue;
1425                         if (rpc->rc_timeouts < 8)
1426                                 rpc->rc_timeouts++;
1427                 }
1428                 /*
1429                  * Check for server not responding
1430                  */
1431                 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1432                     rep->r_rexmit > rpc->rc_deadthresh) {
1433                         rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
1434                                     "not responding");
1435                         rep->r_flags |= R_TPRINTFMSG;
1436                 }
1437                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
1438                         rpcstats.rpctimeouts++;
1439                         rep->r_flags |= R_SOFTTERM;
1440                         continue;
1441                 }
1442                 if (rpc->rc_sotype != SOCK_DGRAM) {
1443                         if (++rep->r_rexmit > RPC_MAXREXMIT)
1444                                 rep->r_rexmit = RPC_MAXREXMIT;
1445                         continue;
1446                 }
1447                 if ((so = rpc->rc_so) == NULL)
1448                         continue;
1449
1450                 /*
1451                  * If there is enough space and the window allows.. Resend it
1452                  * Set r_rtt to -1 in case we fail to send it now.
1453                  */
1454                 rep->r_rtt = -1;
1455                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1456                     ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1457                      (rep->r_flags & R_SENT) ||
1458                      rpc->rc_sent < rpc->rc_cwnd) &&
1459                     (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
1460                         if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
1461                                 error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m,
1462                                                             NULL, NULL, td);
1463                         else
1464                                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td);
1465                         if (error) {
1466                                 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
1467                                         so->so_error = 0;
1468                         } else {
1469                                 /*
1470                                  * Iff first send, start timing else turn
1471                                  * timing off, backoff timer and divide
1472                                  * congestion window by 2.
1473                                  */
1474                                 if (rep->r_flags & R_SENT) {
1475                                         rep->r_flags &= ~R_TIMING;
1476                                         if (++rep->r_rexmit > RPC_MAXREXMIT)
1477                                                 rep->r_rexmit = RPC_MAXREXMIT;
1478                                         rpc->rc_cwnd >>= 1;
1479                                         if (rpc->rc_cwnd < RPC_CWNDSCALE)
1480                                                 rpc->rc_cwnd = RPC_CWNDSCALE;
1481                                         rpcstats.rpcretries++;
1482                                 } else {
1483                                         rep->r_flags |= R_SENT;
1484                                         rpc->rc_sent += RPC_CWNDSCALE;
1485                                 }
1486                                 rep->r_rtt = 0;
1487                         }
1488                 }
1489         }
1490         mtx_unlock(&Giant);     /* rpc_msg -> tprintf */
1491         splx(s);
1492
1493 #ifdef __OpenBSD__
1494         timeout_add(rpcclnt_timer, to, rpcclnt_ticks);
1495 #else
1496         callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer, NULL);
1497 #endif
1498 }
1499
1500 /*
1501  * Test for a termination condition pending on the process. This is used for
1502  * RPCCLNT_INT mounts.
1503  */
1504 int
1505 rpcclnt_sigintr(rpc, task, pr)
1506         struct rpcclnt *rpc;
1507         struct rpctask *task;
1508         RPC_EXEC_CTX pr;
1509 {
1510         struct proc    *p;
1511
1512         sigset_t        tmpset;
1513
1514         if (rpc == NULL) 
1515                 return EFAULT;
1516
1517         /* XXX deal with forced unmounts */
1518
1519         if (task && (task->r_flags & R_SOFTTERM))
1520                 RPC_RETURN(EINTR);
1521
1522         if (!(rpc->rc_flag & RPCCLNT_INT))
1523                 RPC_RETURN(0);
1524
1525         if (pr == NULL)
1526                 return (0);
1527
1528 #ifdef __OpenBSD__
1529         p = pr;
1530         if (p && p->p_siglist &&
1531             (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1532              RPCINT_SIGMASK))
1533                 RPC_RETURN(EINTR);
1534 #else
1535         p = pr->td_proc;
1536         PROC_LOCK(p);
1537         tmpset = p->p_siglist;
1538         SIGSETNAND(tmpset, pr->td_sigmask);
1539         mtx_lock(&p->p_sigacts->ps_mtx);
1540         SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1541         mtx_unlock(&p->p_sigacts->ps_mtx);
1542         if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
1543                 PROC_UNLOCK(p);
1544                 RPC_RETURN(EINTR);
1545         }
1546         PROC_UNLOCK(p);
1547 #endif
1548         RPC_RETURN(0);
1549 }
1550
1551 /*
1552  * Lock a socket against others. Necessary for STREAM sockets to ensure you
1553  * get an entire rpc request/reply and also to avoid race conditions between
1554  * the processes with nfs requests in progress when a reconnect is necessary.
1555  */
1556 static int
1557 rpcclnt_sndlock(flagp, task)
1558         int            *flagp;
1559         struct rpctask *task;
1560 {
1561         RPC_EXEC_CTX p;
1562         int             slpflag = 0, slptimeo = 0;
1563
1564         p = task->r_td;
1565         if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
1566                 slpflag = PCATCH;
1567         while (*flagp & RPCCLNT_SNDLOCK) {
1568                 if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
1569                         RPC_RETURN(EINTR);
1570                 *flagp |= RPCCLNT_WANTSND;
1571                 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck",
1572                              slptimeo);
1573                 if (slpflag == PCATCH) {
1574                         slpflag = 0;
1575                         slptimeo = 2 * hz;
1576                 }
1577         }
1578         *flagp |= RPCCLNT_SNDLOCK;
1579         RPC_RETURN(0);
1580 }
1581
1582 /*
1583  * Unlock the stream socket for others.
1584  */
1585 static void
1586 rpcclnt_sndunlock(flagp)
1587         int            *flagp;
1588 {
1589
1590         if ((*flagp & RPCCLNT_SNDLOCK) == 0)
1591                 panic("rpc sndunlock");
1592         *flagp &= ~RPCCLNT_SNDLOCK;
1593         if (*flagp & RPCCLNT_WANTSND) {
1594                 *flagp &= ~RPCCLNT_WANTSND;
1595                 wakeup((caddr_t) flagp);
1596         }
1597 }
1598
1599 static int
1600 rpcclnt_rcvlock(task)
1601         struct rpctask *task;
1602 {
1603         int            *flagp = &task->r_rpcclnt->rc_flag;
1604         int             slpflag, slptimeo = 0;
1605
1606         if (*flagp & RPCCLNT_INT)
1607                 slpflag = PCATCH;
1608         else
1609                 slpflag = 0;
1610         while (*flagp & RPCCLNT_RCVLOCK) {
1611                 if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
1612                         RPC_RETURN(EINTR);
1613                 *flagp |= RPCCLNT_WANTRCV;
1614                 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk",
1615                              slptimeo);
1616                 if (slpflag == PCATCH) {
1617                         slpflag = 0;
1618                         slptimeo = 2 * hz;
1619                 }
1620         }
1621         *flagp |= RPCCLNT_RCVLOCK;
1622         RPC_RETURN(0);
1623 }
1624
1625 /*
1626  * Unlock the stream socket for others.
1627  */
1628 static void
1629 rpcclnt_rcvunlock(flagp)
1630         int            *flagp;
1631 {
1632
1633         if ((*flagp & RPCCLNT_RCVLOCK) == 0)
1634                 panic("nfs rcvunlock");
1635         *flagp &= ~RPCCLNT_RCVLOCK;
1636         if (*flagp & RPCCLNT_WANTRCV) {
1637                 *flagp &= ~RPCCLNT_WANTRCV;
1638                 wakeup((caddr_t) flagp);
1639         }
1640 }
1641
1642 #if 0
1643 /*
1644  * Check for badly aligned mbuf data areas and realign data in an mbuf list
1645  * by copying the data areas up, as required.
1646  */
1647 void
1648 rpcclnt_realign(m, hsiz)
1649         struct mbuf    *m;
1650         int             hsiz;
1651 {
1652         struct mbuf    *m2;
1653         int             siz, mlen, olen;
1654         caddr_t         tcp, fcp;
1655         struct mbuf    *mnew;
1656
1657         while (m) {
1658                 /*
1659                  * This never happens for UDP, rarely happens for TCP but
1660                  * frequently happens for iso transport.
1661                  */
1662                 if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) {
1663                         olen = m->m_len;
1664                         fcp = mtod(m, caddr_t);
1665                         if ((long)fcp & 0x3) {
1666                                 if (m->m_flags & M_PKTHDR)
1667                                         m_tag_delete_chain(m, NULL);
1668                                 m->m_flags &= ~M_PKTHDR;
1669                                 if (m->m_flags & M_EXT)
1670                                         m->m_data = m->m_ext.ext_buf +
1671                                                 ((m->m_ext.ext_size - olen) & ~0x3);
1672                                 else
1673                                         m->m_data = m->m_dat;
1674                         }
1675                         m->m_len = 0;
1676                         tcp = mtod(m, caddr_t);
1677                         mnew = m;
1678                         m2 = m->m_next;
1679
1680                         /*
1681                          * If possible, only put the first invariant part of
1682                          * the RPC header in the first mbuf.
1683                          */
1684                         mlen = M_TRAILINGSPACE(m);
1685                         if (olen <= hsiz && mlen > hsiz)
1686                                 mlen = hsiz;
1687
1688                         /* Loop through the mbuf list consolidating data. */
1689                         while (m) {
1690                                 while (olen > 0) {
1691                                         if (mlen == 0) {
1692                                                 if (m2->m_flags & M_PKTHDR)
1693                                                         m_tag_delete_chain(m2, NULL);
1694                                                 m2->m_flags &= ~M_PKTHDR;
1695                                                 if (m2->m_flags & M_EXT)
1696                                                         m2->m_data = m2->m_ext.ext_buf;
1697                                                 else
1698                                                         m2->m_data = m2->m_dat;
1699                                                 m2->m_len = 0;
1700                                                 mlen = M_TRAILINGSPACE(m2);
1701                                                 tcp = mtod(m2, caddr_t);
1702                                                 mnew = m2;
1703                                                 m2 = m2->m_next;
1704                                         }
1705                                         siz = min(mlen, olen);
1706                                         if (tcp != fcp)
1707                                                 bcopy(fcp, tcp, siz);
1708                                         mnew->m_len += siz;
1709                                         mlen -= siz;
1710                                         olen -= siz;
1711                                         tcp += siz;
1712                                         fcp += siz;
1713                                 }
1714                                 m = m->m_next;
1715                                 if (m) {
1716                                         olen = m->m_len;
1717                                         fcp = mtod(m, caddr_t);
1718                                 }
1719                         }
1720
1721                         /*
1722                          * Finally, set m_len == 0 for any trailing mbufs
1723                          * that have been copied out of.
1724                          */
1725                         while (m2) {
1726                                 m2->m_len = 0;
1727                                 m2 = m2->m_next;
1728                         }
1729                         return;
1730                 }
1731                 m = m->m_next;
1732         }
1733 }
1734 #else
1735 static void
1736 rpcclnt_realign(struct mbuf **pm, int hsiz)
1737 {
1738         struct mbuf *m;
1739         struct mbuf *n = NULL;
1740         int off = 0;
1741
1742         RPCDEBUG("in rpcclnt_realign()");
1743
1744         while ((m = *pm) != NULL) {
1745             if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1746                 MGET(n, M_TRYWAIT, MT_DATA);
1747                 if (m->m_len >= MINCLSIZE) {
1748                     MCLGET(n, M_TRYWAIT);
1749                 }
1750                 n->m_len = 0;
1751                 break;
1752             }
1753             pm = &m->m_next;
1754         }
1755
1756         /*
1757         * If n is non-NULL, loop on m copying data, then replace the
1758         * portion of the chain that had to be realigned.
1759         */
1760         if (n != NULL) {
1761             while (m) {
1762                 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1763                 off += m->m_len;
1764                 m = m->m_next;
1765             }
1766             m_freem(*pm);
1767             *pm = n;
1768         }
1769
1770         RPCDEBUG("leave rpcclnt_realign()");
1771 }
1772 #endif
1773
1774 static int
1775 rpcclnt_msg(p, server, msg)
1776         RPC_EXEC_CTX   p;
1777         const char     *server;
1778         char           *msg;
1779 {
1780 #ifdef __OpenBSD__
1781         tpr_t           tpr;
1782         struct proc    *pr = p;
1783
1784         if (p)
1785                 tpr = tprintf_open(p);
1786         else
1787                 tpr = NULL;
1788         tprintf(tpr, "rpc server %s: %s\n", server, msg);
1789         tprintf_close(tpr);
1790         RPC_RETURN(0);
1791 #else
1792         GIANT_REQUIRED;
1793
1794         tprintf(p ? p->td_proc : NULL, LOG_INFO,
1795                 "nfs server %s: %s\n", server, msg);
1796         RPC_RETURN(0);
1797 #endif
1798 }
1799
1800 /*
1801  * Build the RPC header and fill in the authorization info. The authorization
1802  * string argument is only used when the credentials come from outside of the
1803  * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
1804  * kernel) Returns the head of the mbuf list.
1805  */
1806 static struct mbuf    *
1807 rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred)
1808         struct rpcclnt *rc;
1809         int             procid;
1810         struct mbuf    *mrest;
1811         u_int32_t       mrest_len;
1812         int            *xidp;
1813         struct mbuf   **mheadend;
1814         struct ucred * cred;
1815 {
1816         /* register */ struct mbuf *mb;
1817         register u_int32_t *tl;
1818         /* register */ caddr_t bpos;
1819         struct mbuf *mreq, *mb2;
1820         int error;
1821
1822         MGETHDR(mb, M_TRYWAIT, MT_DATA);
1823         if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
1824                 MCLGET(mb, M_TRYWAIT);
1825         } else if (6 * RPCX_UNSIGNED < MHLEN) {
1826                 MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
1827         } else {
1828                 RPCDEBUG("mbuf too small");
1829                 panic("cheap bailout");
1830         }
1831         mb->m_len = 0;
1832         mreq = mb;
1833         bpos = mtod(mb, caddr_t);
1834
1835         /*
1836          * First the RPC header.
1837          */
1838         rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);
1839
1840         /* Get a new (non-zero) xid */
1841         if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
1842                 rpcclnt_xid = arc4random();
1843                 rpcclnt_xid_touched = 1;
1844         } else {
1845                 while ((*xidp = arc4random() % 256) == 0);
1846                 rpcclnt_xid += *xidp;
1847         }
1848
1849         /* XXX: funky... */
1850         *tl++ = *xidp = txdr_unsigned(rpcclnt_xid);
1851
1852         *tl++ = rpc_call;
1853         *tl++ = rpc_vers;
1854         *tl++ = txdr_unsigned(rc->rc_prog->prog_id);
1855         *tl++ = txdr_unsigned(rc->rc_prog->prog_version);
1856         *tl++ = txdr_unsigned(procid);
1857
1858         if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) {
1859                 RPCDEBUG("rpcauth_buildheader failed %d", error);
1860                 return NULL;
1861         }
1862
1863         mb->m_next = mrest;
1864         *mheadend = mb;
1865         mreq->m_pkthdr.len = m_length(mreq, NULL);
1866         mreq->m_pkthdr.rcvif = NULL;
1867         return (mreq);
1868 }
1869
1870 /*
1871  * Help break down an mbuf chain by setting the first siz bytes contiguous
1872  * pointed to by returned val. This is used by the macros rpcm_dissect and
1873  * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
1874  */
1875 static int
1876 rpcm_disct(mdp, dposp, siz, left, cp2)
1877         struct mbuf   **mdp;
1878         caddr_t        *dposp;
1879         int             siz;
1880         int             left;
1881         caddr_t        *cp2;
1882 {
1883         struct mbuf    *mp, *mp2;
1884         int             siz2, xfer;
1885         caddr_t         p;
1886
1887         mp = *mdp;
1888         while (left == 0) {
1889                 *mdp = mp = mp->m_next;
1890                 if (mp == NULL)
1891                         RPC_RETURN(EBADRPC);
1892                 left = mp->m_len;
1893                 *dposp = mtod(mp, caddr_t);
1894         }
1895         if (left >= siz) {
1896                 *cp2 = *dposp;
1897                 *dposp += siz;
1898         } else if (mp->m_next == NULL) {
1899                 RPC_RETURN(EBADRPC);
1900         } else if (siz > MHLEN) {
1901                 panic("rpc S too big");
1902         } else {
1903                 MGET(mp2, M_TRYWAIT, MT_DATA);
1904                 mp2->m_next = mp->m_next;
1905                 mp->m_next = mp2;
1906                 mp->m_len -= left;
1907                 mp = mp2;
1908                 *cp2 = p = mtod(mp, caddr_t);
1909                 bcopy(*dposp, p, left); /* Copy what was left */
1910                 siz2 = siz - left;
1911                 p += left;
1912                 mp2 = mp->m_next;
1913                 /* Loop around copying up the siz2 bytes */
1914                 while (siz2 > 0) {
1915                         if (mp2 == NULL)
1916                                 RPC_RETURN(EBADRPC);
1917                         xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
1918                         if (xfer > 0) {
1919                                 bcopy(mtod(mp2, caddr_t), p, xfer);
1920                                 RPCMADV(mp2, xfer);
1921                                 mp2->m_len -= xfer;
1922                                 p += xfer;
1923                                 siz2 -= xfer;
1924                         }
1925                         if (siz2 > 0)
1926                                 mp2 = mp2->m_next;
1927                 }
1928                 mp->m_len = siz;
1929                 *mdp = mp2;
1930                 *dposp = mtod(mp2, caddr_t);
1931         }
1932         RPC_RETURN(0);
1933 }
1934
1935
1936
1937 static u_int32_t
1938 rpcclnt_proct(rpc, procid)
1939         struct rpcclnt *rpc;
1940         u_int32_t       procid;
1941 {
1942         if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
1943             procid < rpc->rc_proctlen) {
1944                 return rpc->rc_proct[procid];
1945         }
1946         return (0);
1947 }
1948
1949 static int
1950 rpc_adv(mdp, dposp, offs, left)
1951         struct mbuf   **mdp;
1952         caddr_t        *dposp;
1953         int             offs;
1954         int             left;
1955 {
1956         struct mbuf    *m;
1957         int             s;
1958
1959         m = *mdp;
1960         s = left;
1961         while (s < offs) {
1962                 offs -= s;
1963                 m = m->m_next;
1964                 if (m == NULL)
1965                         RPC_RETURN(EBADRPC);
1966                 s = m->m_len;
1967         }
1968         *mdp = m;
1969         *dposp = mtod(m, caddr_t) + offs;
1970         RPC_RETURN(0);
1971 }
1972
1973 int
1974 rpcclnt_cancelreqs(rpc)
1975         struct rpcclnt *rpc;
1976 {
1977         struct rpctask *task;
1978         int             i, s;
1979
1980         s = splnet();
1981         TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1982                 if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
1983                     (task->r_flags & R_SOFTTERM))
1984                         continue;
1985                 rpcclnt_softterm(task);
1986         }
1987         splx(s);
1988
1989         for (i = 0; i < 30; i++) {
1990                 s = splnet();
1991                 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1992                         if (rpc == task->r_rpcclnt)
1993                                 break;
1994                 }
1995                 splx(s);
1996                 if (task == NULL)
1997                         return (0);
1998                 tsleep(&lbolt, PSOCK, "nfscancel", 0);
1999         }
2000         return (EBUSY);
2001 }
2002
2003 static void
2004 rpcclnt_softterm(struct rpctask * task)
2005 {
2006         task->r_flags |= R_SOFTTERM;
2007         if (task->r_flags & R_SENT) {
2008                 task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
2009                 task->r_flags &= ~R_SENT;
2010         }
2011 }
2012
2013
2014 #ifndef __OpenBSD__
2015 /* called by rpcclnt_get() */
2016 void
2017 rpcclnt_create(struct rpcclnt ** rpc)
2018 {
2019         MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
2020 }
2021
2022 /* called by rpcclnt_put() */
2023 void
2024 rpcclnt_destroy(struct rpcclnt * rpc)
2025 {
2026         if (rpc != NULL) {
2027                 FREE(rpc, M_RPC);
2028         } else {
2029                 RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
2030         }
2031 }
2032 #endif                          /* !__OpenBSD__ */
2033
2034
2035 /* XXX: add a lock around the auth structure in struct rpcclnt and make this
2036  * call safe for calling durring a connection */
2037 static int
2038 rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp)
2039 {
2040         size_t authsiz, verfsiz;
2041         uint32_t mlen, grpsiz;
2042         register struct mbuf *mb, *mb2;
2043         caddr_t bpos;
2044         register u_int32_t *tl;
2045         register int i;
2046
2047         if (auth == NULL || mhdr == NULL)
2048           return EFAULT;
2049
2050         switch (auth->auth_type) {
2051         case RPCAUTH_NULL:
2052                 authsiz = 0;
2053                 verfsiz = 0;
2054         break;
2055         case RPCAUTH_UNIX:
2056                 authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED;
2057                 verfsiz = 0;
2058         break;
2059         default:
2060                 return EPROTONOSUPPORT;
2061         break;
2062         };
2063
2064         mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED;
2065
2066         mb = *mhdr;
2067         bpos = *bp;
2068
2069         rpcm_build(tl, u_int32_t *, mlen);
2070
2071         *bp = bpos;
2072         *mhdr = mb;
2073
2074         *tl++ = txdr_unsigned(auth->auth_type);
2075         *tl++ = txdr_unsigned(authsiz);
2076         switch (auth->auth_type) {
2077         case RPCAUTH_UNIX:
2078                 *tl++ = 0;
2079                 *tl++ = 0; 
2080
2081                 *tl++ = txdr_unsigned(cred->cr_uid);
2082                 *tl++ = txdr_unsigned(cred->cr_groups[0]);
2083                 grpsiz = cred->cr_ngroups;
2084                 *tl++ = txdr_unsigned(grpsiz);
2085                 /* XXX: groups[0] is already sent... */
2086                 for (i = 0 ; i < grpsiz ; i++) {
2087                          *tl++ = txdr_unsigned(cred->cr_groups[i]);
2088                 }
2089
2090                 /* null verification header */
2091                 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2092                 *tl++ = 0;
2093         break;
2094         case RPCAUTH_NULL:
2095                 /* just a null verf header */
2096                 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2097                 *tl = 0;
2098         break;
2099         default:
2100                 panic("inconsistent rpc auth type");
2101         break;
2102         }
2103
2104         return 0;
2105 }