2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
6 * the regents of the university of michigan
9 * permission is granted to use, copy, create derivative works and redistribute
10 * this software and such derivative works for any purpose, so long as the name
11 * of the university of michigan is not used in any advertising or publicity
12 * pertaining to the use or distribution of this software without specific,
13 * written prior authorization. if the above copyright notice or any other
14 * identification of the university of michigan is included in any copy of any
15 * portion of this software, then the disclaimer below must also be included.
17 * this software is provided as is, without representation from the university
18 * of michigan as to its fitness for any purpose, and without warranty by the
19 * university of michigan of any kind, either express or implied, including
20 * without limitation the implied warranties of merchantability and fitness for
21 * a particular purpose. the regents of the university of michigan shall not be
22 * liable for any damages, including special, indirect, incidental, or
23 * consequential damages, with respect to any claim arising out of or in
24 * connection with the use of the software, even if it has been or is hereafter
25 * advised of the possibility of such damages.
29 * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30 * California. All rights reserved.
32 * This code is derived from software contributed to Berkeley by Rick Macklem at
33 * The University of Guelph.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions are
37 * met: 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer. 2.
39 * Redistributions in binary form must reproduce the above copyright notice,
40 * this list of conditions and the following disclaimer in the documentation
41 * and/or other materials provided with the distribution. 3. All advertising
42 * materials mentioning features or use of this software must display the
43 * following acknowledgement: This product includes software developed by the
44 * University of California, Berkeley and its contributors. 4. Neither the
45 * name of the University nor the names of its contributors may be used to
46 * endorse or promote products derived from this software without specific
47 * prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66 * can run, but clean it up! */
68 #include <sys/param.h>
69 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
92 #include <nfs/rpcv2.h>
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
97 /* memory management */
99 struct pool rpctask_pool;
100 struct pool rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
104 static MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
110 * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111 * and mean deviation of rtt for the appropriate type of rpc for the frequent
112 * rpcs and a default for the others. The justification for doing "other"
113 * this way is that these rpcs happen so infrequently that timer est. would
114 * probably be stale. Also, since many of these rpcs are non-idempotent, a
115 * conservative timeout is desired. getattr, lookup - A+2D read, write -
116 * A+4D other - nm_timeo
118 #define RPC_RTO(n, t) \
119 ((t) == 0 ? (n)->rc_timeo : \
121 (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122 ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
124 #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
127 #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
132 * There is a congestion window for outstanding rpcs maintained per mount
133 * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134 * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135 * August 1988. describes for TCP. The cwnd size is chopped in half on a
136 * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137 * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138 * are scaled for integer arith.) Variants of "slow start" were tried and
139 * were found to be too much of a performance hit (ave. rtt 3 times larger),
140 * I suspect due to the large rtt that nfs rpcs have.
142 #define RPC_CWNDSCALE 256
143 #define RPC_MAXCWND (RPC_CWNDSCALE * 32)
144 static const int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149 "", /* no good message... */
150 "remote server hasn't exported program.",
151 "remote server can't support version number.",
152 "program can't support procedure.",
153 "procedure can't decode params.",
154 "remote error. remote side memory allocation failure?"
157 char *rpc_errstr_denied[2] = {
158 "remote server doesnt support rpc version 2!",
159 "remote server authentication error."
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
165 "auth error: bad credential (seal broken).",
166 "auth error: client must begin new session.",
167 "auth error: bad verifier (seal broken).",
168 "auth error: verifier expired or replayed.",
169 "auth error: rejected for security reasons.",
173 * Static data, mostly RPC constants in XDR form
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
178 * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179 * rpc_autherr, rpc_auth_kerb;
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
187 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
189 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
198 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
202 * Queue head for rpctask's
205 TAILQ_HEAD(, rpctask) rpctask_q;
206 struct callout rpcclnt_callout;
209 static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
210 static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
212 static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
213 static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
216 static int rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
218 static int rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
219 static void rpcclnt_timer(void *);
220 static int rpcclnt_sndlock(int *, struct rpctask *);
221 static void rpcclnt_sndunlock(int *);
222 static int rpcclnt_rcvlock(struct rpctask *);
223 static void rpcclnt_rcvunlock(int *);
225 void rpcclnt_realign(struct mbuf *, int);
227 static void rpcclnt_realign(struct mbuf **, int);
230 static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
231 static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
232 static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t);
233 static int rpc_adv(struct mbuf **, caddr_t *, int, int);
234 static void rpcclnt_softterm(struct rpctask * task);
236 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
242 static struct timeout rpcclnt_timer_to;
245 rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
246 if (rpcclnt_ticks < 1)
248 rpcstats.rpcretries = 0;
249 rpcstats.rpcrequests = 0;
250 rpcstats.rpctimeouts = 0;
251 rpcstats.rpcunexpected = 0;
252 rpcstats.rpcinvalid = 0;
255 * rpc constants how about actually using more than one of these!
258 rpc_reply = txdr_unsigned(RPC_REPLY);
259 rpc_vers = txdr_unsigned(RPC_VER2);
260 rpc_call = txdr_unsigned(RPC_CALL);
262 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
263 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
264 rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
265 rpc_autherr = txdr_unsigned(RPC_AUTHERR);
266 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
267 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
270 /* initialize rpctask queue */
271 TAILQ_INIT(&rpctask_q);
274 /* initialize pools */
275 pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
277 pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
278 pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
280 pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
282 /* initialize timers */
283 timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
284 rpcclnt_timer(&rpcclnt_timer_to);
285 #else /* !__OpenBSD__ */
286 callout_init(&rpcclnt_callout, 0);
287 #endif /* !__OpenBSD__ */
289 RPCDEBUG("rpc initialed");
298 /* XXX delete sysctl variables? */
299 callout_stop(&rpcclnt_callout);
303 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
304 struct rpcclnt * clnt;
305 struct rpc_program * program;
306 struct sockaddr * addr;
309 struct rpc_auth * auth;
314 if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
317 if (program->prog_name == NULL)
319 clnt->rc_prog = program;
321 clnt->rc_name = addr;
322 clnt->rc_sotype = sotype;
323 clnt->rc_soproto = soproto;
324 clnt->rc_auth = auth;
325 clnt->rc_rsize = max_read_size;
326 clnt->rc_wsize = max_write_size;
327 clnt->rc_flag = flags;
329 clnt->rc_proctlen = 0;
330 clnt->rc_proct = NULL;
336 * Initialize sockets and congestion for a new RPC connection. We do not free
337 * the sockaddr if error.
340 rpcclnt_connect(rpc, td)
345 int s, error, rcvreserve, sndreserve;
346 struct sockaddr *saddr;
349 struct sockaddr_in *sin;
352 struct sockaddr_in sin;
359 RPCDEBUG("no rpcclnt struct!\n");
363 /* create the socket */
366 saddr = rpc->rc_name;
368 error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
369 rpc->rc_soproto, td->td_ucred, td);
371 RPCDEBUG("error %d in socreate()", error);
375 rpc->rc_soflags = so->so_proto->pr_flags;
378 * Some servers require that the client port be a reserved port
379 * number. We always allocate a reserved port, as this prevents
380 * filehandle disclosure through UDP port capture.
382 if (saddr->sa_family == AF_INET) {
389 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
390 mopt->m_len = sizeof(int);
391 ip = mtod(mopt, int *);
392 *ip = IP_PORTRANGE_LOW;
394 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
396 soarg = IP_PORTRANGE_LOW;
397 bzero(&opt, sizeof(struct sockopt));
398 opt.sopt_dir = SOPT_SET;
399 opt.sopt_level = IPPROTO_IP;
400 opt.sopt_name = IP_PORTRANGE;
401 opt.sopt_val = &soarg;
402 opt.sopt_valsize = sizeof(soarg);
404 error = sosetopt(so, &opt);
410 MGET(m, M_TRYWAIT, MT_SONAME);
411 sin = mtod(m, struct sockaddr_in *);
412 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
413 sin->sin_family = AF_INET;
414 sin->sin_addr.s_addr = INADDR_ANY;
415 sin->sin_port = htons(0);
416 error = sobind(so, m);
419 sin.sin_len = sizeof(struct sockaddr_in);
420 sin.sin_family = AF_INET;
421 sin.sin_addr.s_addr = INADDR_ANY;
422 sin.sin_port = htons(0);
424 * &thread0 gives us root credentials to ensure sobind
425 * will give us a reserved ephemeral port.
427 error = sobind(so, (struct sockaddr *) & sin, &thread0);
433 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
434 mopt->m_len = sizeof(int);
435 ip = mtod(mopt, int *);
436 *ip = IP_PORTRANGE_DEFAULT;
437 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
439 soarg = IP_PORTRANGE_DEFAULT;
440 bzero(&opt, sizeof(struct sockopt));
441 opt.sopt_dir = SOPT_SET;
442 opt.sopt_level = IPPROTO_IP;
443 opt.sopt_name = IP_PORTRANGE;
444 opt.sopt_val = &soarg;
445 opt.sopt_valsize = sizeof(soarg);
446 error = sosetopt(so, &opt);
452 * Protocols that do not require connections may be optionally left
453 * unconnected for servers that reply from a port other than
456 if (rpc->rc_flag & RPCCLNT_NOCONN) {
457 if (rpc->rc_soflags & PR_CONNREQUIRED) {
462 error = soconnect(so, saddr, td);
467 * Wait for the connection to complete. Cribbed from the
468 * connect system call but with the wait timing out so that
469 * interruptible mounts don't hang here for a long time.
476 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
477 (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
481 * XXX needs to catch interrupt signals. something
482 * like this: if ((so->so_state & SS_ISCONNECTING) &&
483 * so->so_error == 0 && rep && (error =
484 * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
485 * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
490 error = so->so_error;
497 if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
498 so->so_rcv.sb_timeo = (5 * hz);
499 so->so_snd.sb_timeo = (5 * hz);
501 so->so_rcv.sb_timeo = 0;
502 so->so_snd.sb_timeo = 0;
506 if (rpc->rc_sotype == SOCK_DGRAM) {
507 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
508 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
509 } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
510 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
511 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
513 if (rpc->rc_sotype != SOCK_STREAM)
514 panic("rpcclnt_connect() bad sotype");
515 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
517 MGET(m, M_TRYWAIT, MT_SOOPTS);
518 *mtod(m, int32_t *) = 1;
519 m->m_len = sizeof(int32_t);
520 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
524 bzero(&opt, sizeof(struct sockopt));
525 opt.sopt_dir = SOPT_SET;
526 opt.sopt_level = SOL_SOCKET;
527 opt.sopt_name = SO_KEEPALIVE;
528 opt.sopt_val = &soarg;
529 opt.sopt_valsize = sizeof(soarg);
533 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
535 MGET(m, M_TRYWAIT, MT_SOOPTS);
536 *mtod(m, int32_t *) = 1;
537 m->m_len = sizeof(int32_t);
538 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
542 bzero(&opt, sizeof(struct sockopt));
543 opt.sopt_dir = SOPT_SET;
544 opt.sopt_level = IPPROTO_TCP;
545 opt.sopt_name = TCP_NODELAY;
546 opt.sopt_val = &soarg;
547 opt.sopt_valsize = sizeof(soarg);
551 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
552 sizeof(u_int32_t)) * 2;
553 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
554 sizeof(u_int32_t)) * 2;
556 error = soreserve(so, sndreserve, rcvreserve);
559 so->so_rcv.sb_flags |= SB_NOINTR;
560 so->so_snd.sb_flags |= SB_NOINTR;
562 /* Initialize other non-zero congestion variables */
563 rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
564 rpc->rc_srtt[3] = (RPC_TIMEO << 3);
565 rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
566 rpc->rc_sdrtt[3] = 0;
567 rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
569 rpc->rc_timeouts = 0;
573 rpcclnt_disconnect(rpc);
580 * Called when a connection is broken on a reliable protocol.
581 * - clean up the old socket
582 * - rpcclnt_connect() again
583 * - set R_MUSTRESEND for all outstanding requests on mount point
584 * If this fails the mount point is DEAD!
585 * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
588 rpcclnt_reconnect(rep, td)
593 struct rpcclnt *rpc = rep->r_rpcclnt;
596 rpcclnt_disconnect(rpc);
597 while ((error = rpcclnt_connect(rpc, td)) != 0) {
598 if (error == EINTR || error == ERESTART)
600 tsleep(&lbolt, PSOCK, "rpccon", 0);
604 * Loop through outstanding request list and fix up all requests on
607 for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
608 rp = TAILQ_NEXT(rp, r_chain)) {
609 if (rp->r_rpcclnt == rpc)
610 rp->r_flags |= R_MUSTRESEND;
616 * RPC transport disconnect. Clean up and unlink.
619 rpcclnt_disconnect(rpc)
633 rpcclnt_safedisconnect(struct rpcclnt * rpc)
635 struct rpctask dummytask;
637 bzero(&dummytask, sizeof(dummytask));
638 dummytask.r_rpcclnt = rpc;
639 rpcclnt_rcvlock(&dummytask);
640 rpcclnt_disconnect(rpc);
641 rpcclnt_rcvunlock(&rpc->rc_flag);
645 * This is the rpc send routine. For connection based socket types, it
646 * must be called with an rpcclnt_sndlock() on the socket.
647 * "rep == NULL" indicates that it has been called from a server.
648 * For the client side:
649 * - return EINTR if the RPC is terminated, 0 otherwise
650 * - set R_MUSTRESEND if the send fails for any reason
651 * - do any cleanup required by recoverable socket errors (?)
652 * For the server side:
653 * - return EINTR or ERESTART if interrupted by a signal
654 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
655 * - do any cleanup required by recoverable socket errors (?)
658 rpcclnt_send(so, nam, top, rep)
663 struct sockaddr *nam;
669 struct mbuf *sendnam;
671 struct sockaddr *sendnam;
672 struct thread *td = curthread;
674 int error, soflags, flags;
677 if (rep->r_flags & R_SOFTTERM) {
681 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
682 rep->r_flags |= R_MUSTRESEND;
686 rep->r_flags &= ~R_MUSTRESEND;
687 soflags = rep->r_rpcclnt->rc_soflags;
689 soflags = so->so_proto->pr_flags;
691 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
696 if (so->so_type == SOCK_SEQPACKET)
702 * XXXRW: If/when this code becomes MPSAFE itself, Giant might have
703 * to be conditionally acquired earlier for the stack so has to avoid
704 * lock order reversals with any locks held over rpcclnt_send().
706 error = sosend(so, sendnam, NULL, top, NULL, flags, td);
709 log(LOG_INFO, "rpc send error %d for service %s\n", error,
710 rep->r_rpcclnt->rc_prog->prog_name);
712 * Deal with errors for the client side.
714 if (rep->r_flags & R_SOFTTERM)
717 rep->r_flags |= R_MUSTRESEND;
719 log(LOG_INFO, "rpc service send error %d\n", error);
722 * Handle any recoverable (soft) socket errors here.
724 if (error != EINTR && error != ERESTART &&
725 error != EWOULDBLOCK && error != EPIPE)
732 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
733 * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
734 * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
735 * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
736 * must be very careful to read an entire record once we have read any of it,
737 * even if the system call has been interrupted.
740 rpcclnt_receive(rep, aname, mp, td)
745 struct sockaddr **aname;
754 struct mbuf *control;
757 struct mbuf **getnam;
759 struct sockaddr **getnam;
761 int error, sotype, rcvflg;
764 * Set up arguments for soreceive()
768 sotype = rep->r_rpcclnt->rc_sotype;
771 * For reliable protocols, lock against other senders/receivers in
772 * case a reconnect is necessary. For SOCK_STREAM, first get the
773 * Record Mark to find out how much more there is to get. We must
774 * lock the socket against other receivers until we have an entire
777 if (sotype != SOCK_DGRAM) {
778 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
783 * Check for fatal errors and resending request.
786 * Ugh: If a reconnect attempt just happened, rc_so would
787 * have changed. NULL indicates a failed attempt that has
788 * essentially shut down this mount point.
790 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
791 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
794 so = rep->r_rpcclnt->rc_so;
796 error = rpcclnt_reconnect(rep, td);
798 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
803 while (rep->r_flags & R_MUSTRESEND) {
804 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
805 rpcstats.rpcretries++;
806 error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
808 if (error == EINTR || error == ERESTART ||
809 (error = rpcclnt_reconnect(rep, td)) != 0) {
810 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
816 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
817 if (sotype == SOCK_STREAM) {
818 aio.iov_base = (caddr_t) & len;
819 aio.iov_len = sizeof(u_int32_t);
822 auio.uio_segflg = UIO_SYSSPACE;
823 auio.uio_rw = UIO_READ;
825 auio.uio_resid = sizeof(u_int32_t);
832 rcvflg = MSG_WAITALL;
833 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
834 if (error == EWOULDBLOCK && rep) {
835 if (rep->r_flags & R_SOFTTERM)
838 } while (error == EWOULDBLOCK);
839 if (!error && auio.uio_resid > 0) {
841 "short receive (%zu/%zu) from rpc server %s\n",
842 sizeof(u_int32_t) - auio.uio_resid,
844 rep->r_rpcclnt->rc_prog->prog_name);
849 len = ntohl(len) & ~0x80000000;
851 * This is SERIOUS! We are out of sync with the
852 * sender and forcing a disconnect/reconnect is all I
855 if (len > RPC_MAXPACKET) {
856 log(LOG_ERR, "%s (%d) from rpc server %s\n",
857 "impossible packet length",
859 rep->r_rpcclnt->rc_prog->prog_name);
863 auio.uio_resid = len;
865 rcvflg = MSG_WAITALL;
866 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
867 } while (error == EWOULDBLOCK || error == EINTR ||
869 if (!error && auio.uio_resid > 0) {
871 "short receive (%d/%d) from rpc server %s\n",
872 len - auio.uio_resid, len,
873 rep->r_rpcclnt->rc_prog->prog_name);
878 * NB: Since uio_resid is big, MSG_WAITALL is ignored
879 * and soreceive() will return when it has either a
880 * control msg or a data msg. We have no use for
881 * control msg., but must grab them and then throw
882 * them away so we know what is going on.
884 auio.uio_resid = len = 100000000; /* Anything Big */
892 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
895 if (error == EWOULDBLOCK && rep) {
896 if (rep->r_flags & R_SOFTTERM)
899 } while (error == EWOULDBLOCK ||
900 (!error && *mp == NULL && control));
901 if ((rcvflg & MSG_EOR) == 0)
903 if (!error && *mp == NULL)
905 len -= auio.uio_resid;
908 if (error && error != EINTR && error != ERESTART) {
910 *mp = (struct mbuf *) 0;
913 "receive error %d from rpc server %s\n",
915 rep->r_rpcclnt->rc_prog->prog_name);
916 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
918 error = rpcclnt_reconnect(rep, td);
923 if ((so = rep->r_rpcclnt->rc_so) == NULL)
925 if (so->so_state & SS_ISCONNECTED)
929 auio.uio_resid = len = 1000000;
938 error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
939 RPCDEBUG("soreceive returns %d", error);
940 if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
941 RPCDEBUG("wouldblock && softerm -> EINTR");
944 } while (error == EWOULDBLOCK);
945 len -= auio.uio_resid;
952 * Search for any mbufs that are not a multiple of 4 bytes
953 * long or with m_data not longword aligned. These could
954 * cause pointer alignment problems, so copy them to well
957 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
964 * Implement receipt of reply on a socket. We must search through the list of
965 * received datagrams matching them with outstanding requests using the xid,
966 * until ours is found.
970 rpcclnt_reply(myrep, td)
971 struct rpctask *myrep;
975 struct rpcclnt *rpc = myrep->r_rpcclnt;
977 struct mbuf *mrep, *md;
981 struct sockaddr *nam;
988 * Loop around until we get our own reply
992 * Lock against other receivers so that I don't get stuck in
993 * sbwait() after someone else has received my reply for me.
994 * Also necessary for connection based protocols to avoid
995 * race conditions during a reconnect.
997 error = rpcclnt_rcvlock(myrep);
1000 /* Already received, bye bye */
1001 if (myrep->r_mrep != NULL) {
1002 rpcclnt_rcvunlock(&rpc->rc_flag);
1006 * Get the next Rpc reply off the socket
1008 error = rpcclnt_receive(myrep, &nam, &mrep, td);
1010 rpcclnt_rcvunlock(&rpc->rc_flag);
1014 * Ignore routing errors on connectionless
1017 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
1018 rpc->rc_so->so_error = 0;
1019 if (myrep->r_flags & R_GETONEREP)
1021 RPCDEBUG("ingoring routing error on connectionless protocol.");
1031 FREE(nam, M_SONAME);
1035 * Get the xid and check that it is an rpc reply
1038 dpos = mtod(md, caddr_t);
1039 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1041 if (*tl != rpc_reply) {
1042 rpcstats.rpcinvalid++;
1045 if (myrep->r_flags & R_GETONEREP)
1050 * Loop through the request list to match up the reply Iff no
1051 * match, just drop the datagram
1053 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1054 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1061 * Update congestion window. Do the additive
1062 * increase of one rpc/rtt.
1064 if (rpc->rc_cwnd <= rpc->rc_sent) {
1066 (RPC_CWNDSCALE * RPC_CWNDSCALE +
1067 (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
1068 if (rpc->rc_cwnd > RPC_MAXCWND)
1069 rpc->rc_cwnd = RPC_MAXCWND;
1071 rep->r_flags &= ~R_SENT;
1072 rpc->rc_sent -= RPC_CWNDSCALE;
1074 * Update rtt using a gain of 0.125 on the
1075 * mean and a gain of 0.25 on the deviation.
1077 if (rep->r_flags & R_TIMING) {
1079 * Since the timer resolution of
1080 * NFS_HZ is so course, it can often
1081 * result in r_rtt == 0. Since r_rtt
1082 * == N means that the actual rtt is
1083 * between N+dt and N+2-dt ticks, add
1086 t1 = rep->r_rtt + 1;
1087 t1 -= (RPC_SRTT(rpc, rep) >> 3);
1088 RPC_SRTT(rpc, rep) += t1;
1091 t1 -= (RPC_SDRTT(rpc, rep) >> 2);
1092 RPC_SDRTT(rpc, rep) += t1;
1094 rpc->rc_timeouts = 0;
1099 * If not matched to a request, drop it. If it's mine, get
1103 rpcstats.rpcunexpected++;
1104 RPCDEBUG("rpc reply not matched\n");
1106 } else if (rep == myrep) {
1107 if (rep->r_mrep == NULL)
1108 panic("rpcreply nil");
1111 if (myrep->r_flags & R_GETONEREP)
1116 /* XXX: ignores tryagain! */
1118 * code from nfs_request - goes something like this
1119 * - fill in task struct
1120 * - links task into list
1121 * - calls rpcclnt_send() for first transmit
1122 * - calls rpcclnt_reply() to get reply
1123 * - fills in reply (which should be initialized prior to
1124 * calling), which is valid when 0 is returned and is
1125 * NEVER freed in this function
1127 * nb: always frees the request header, but NEVER frees 'mrest'
1129 * rpcclnt_setauth() should be used before calling this. EAUTH is returned if
1130 * authentication fails.
1132 * note that reply->result_* are invalid unless reply->type ==
1133 * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
1134 * are invalid unless reply->type == RPC_MSGACCEPTED
1137 rpcclnt_request(rpc, mrest, procnum, td, cred, reply)
1138 struct rpcclnt *rpc;
1143 struct rpc_reply *reply;
1145 struct mbuf *m, *mrep;
1146 struct rpctask *task;
1148 struct mbuf *md, *mheadend;
1150 int t1, s, error = 0, mrest_len;
1154 task = pool_get(&rpctask_pool, PR_WAITOK);
1156 MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO));
1159 task->r_rpcclnt = rpc;
1160 task->r_procnum = procnum;
1163 mrest_len = m_length(mrest, NULL);
1165 m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend,
1168 * This can happen if the auth_type is neither UNIX or NULL
1172 pool_put(&rpctask_pool, task);
1176 error = EPROTONOSUPPORT;
1181 * For stream protocols, insert a Sun RPC Record Mark.
1183 if (rpc->rc_sotype == SOCK_STREAM) {
1184 M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
1185 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1186 (m->m_pkthdr.len - RPCX_UNSIGNED));
1191 if (rpc->rc_flag & RPCCLNT_SOFT)
1192 task->r_retry = rpc->rc_retry;
1194 task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */
1195 task->r_rtt = task->r_rexmit = 0;
1197 if (rpcclnt_proct(rpc, procnum) > 0)
1198 task->r_flags = R_TIMING;
1201 task->r_mrep = NULL;
1204 * Do the client side RPC.
1206 rpcstats.rpcrequests++;
1209 * Chain request into list of outstanding requests. Be sure to put it
1210 * LAST so timer finds oldest requests first.
1213 if (TAILQ_EMPTY(&rpctask_q))
1214 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer,
1216 TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);
1219 * If backing off another request or avoiding congestion, don't send
1220 * this one now but let timer do it. If not timing a request, do it
1223 if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
1224 (rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1225 rpc->rc_sent < rpc->rc_cwnd)) {
1228 if (rpc->rc_soflags & PR_CONNREQUIRED)
1229 error = rpcclnt_sndlock(&rpc->rc_flag, task);
1231 error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
1232 m_copym(m, 0, M_COPYALL, M_TRYWAIT),
1234 if (rpc->rc_soflags & PR_CONNREQUIRED)
1235 rpcclnt_sndunlock(&rpc->rc_flag);
1237 if (!error && (task->r_flags & R_MUSTRESEND) == 0) {
1238 rpc->rc_sent += RPC_CWNDSCALE;
1239 task->r_flags |= R_SENT;
1247 * Wait for the reply from our send or the timer's.
1249 if (!error || error == EPIPE)
1250 error = rpcclnt_reply(task, td);
1253 * RPC done, unlink the request.
1256 TAILQ_REMOVE(&rpctask_q, task, r_chain);
1257 if (TAILQ_EMPTY(&rpctask_q))
1258 callout_stop(&rpcclnt_callout);
1262 * Decrement the outstanding request count.
1264 if (task->r_flags & R_SENT) {
1265 task->r_flags &= ~R_SENT; /* paranoia */
1266 rpc->rc_sent -= RPC_CWNDSCALE;
1269 * If there was a successful reply and a tprintf msg. tprintf a
1272 if (!error && (task->r_flags & R_TPRINTFMSG)) {
1274 rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name,
1279 /* free request header (leaving mrest) */
1280 mheadend->m_next = NULL;
1281 m_freem(task->r_mreq);
1283 /* initialize reply */
1284 reply->mrep = task->r_mrep;
1285 reply->verf_md = NULL;
1286 reply->result_md = NULL;
1288 mrep = task->r_mrep;
1290 dpos = task->r_dpos;
1292 /* task structure is no longer needed */
1294 pool_put(&rpctask_pool, task);
1303 * break down the rpc header and check if ok
1306 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1307 reply->stat.type = fxdr_unsigned(u_int32_t, *tl);
1309 if (reply->stat.type == RPC_MSGDENIED) {
1310 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1311 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1313 switch (reply->stat.status) {
1315 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1316 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1317 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1321 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1322 reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
1330 } else if (reply->stat.type != RPC_MSGACCEPTED) {
1335 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1337 reply->verf_md = md;
1338 reply->verf_dpos = dpos;
1340 reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
1341 reply->verf_size = fxdr_unsigned(u_int32_t, *tl);
1343 if (reply->verf_size != 0)
1344 rpcm_adv(rpcm_rndup(reply->verf_size));
1346 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1347 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1349 if (reply->stat.status == RPC_SUCCESS) {
1350 if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
1351 RPCDEBUG("where is the next mbuf?");
1352 RPCDEBUG("%d -> %d",
1353 (int)(dpos - mtod(md, caddr_t)), md->m_len);
1354 if (md->m_next == NULL) {
1358 reply->result_md = md->m_next;
1359 reply->result_dpos = mtod(reply->result_md,
1363 reply->result_md = md;
1364 reply->result_dpos = dpos;
1366 } else if (reply->stat.status == RPC_PROGMISMATCH) {
1367 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1368 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1369 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1373 error = EPROTONOSUPPORT;
1385 * Scan the rpctask list and retranmit any requests that have timed out.
1386 * To avoid retransmission attempts on STREAM sockets (in the future) make
1387 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1394 struct timeout *to = (struct timeout *) arg;
1396 struct rpctask *rep;
1399 struct rpcclnt *rpc;
1404 struct thread *td = curthread;
1412 mtx_lock(&Giant); /* rpc_msg -> tprintf */
1413 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1414 rpc = rep->r_rpcclnt;
1415 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1417 if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
1418 rep->r_flags |= R_SOFTTERM;
1421 if (rep->r_rtt >= 0) {
1423 if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
1424 timeo = rpc->rc_timeo;
1426 timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt,
1428 if (rpc->rc_timeouts > 0)
1429 timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
1430 if (rep->r_rtt <= timeo)
1432 if (rpc->rc_timeouts < 8)
1436 * Check for server not responding
1438 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1439 rep->r_rexmit > rpc->rc_deadthresh) {
1440 rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
1442 rep->r_flags |= R_TPRINTFMSG;
1444 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1445 rpcstats.rpctimeouts++;
1446 rep->r_flags |= R_SOFTTERM;
1449 if (rpc->rc_sotype != SOCK_DGRAM) {
1450 if (++rep->r_rexmit > RPC_MAXREXMIT)
1451 rep->r_rexmit = RPC_MAXREXMIT;
1454 if ((so = rpc->rc_so) == NULL)
1458 * If there is enough space and the window allows.. Resend it
1459 * Set r_rtt to -1 in case we fail to send it now.
1462 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1463 ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1464 (rep->r_flags & R_SENT) ||
1465 rpc->rc_sent < rpc->rc_cwnd) &&
1466 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
1467 if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
1468 error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m,
1471 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td);
1473 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
1477 * Iff first send, start timing else turn
1478 * timing off, backoff timer and divide
1479 * congestion window by 2.
1481 if (rep->r_flags & R_SENT) {
1482 rep->r_flags &= ~R_TIMING;
1483 if (++rep->r_rexmit > RPC_MAXREXMIT)
1484 rep->r_rexmit = RPC_MAXREXMIT;
1486 if (rpc->rc_cwnd < RPC_CWNDSCALE)
1487 rpc->rc_cwnd = RPC_CWNDSCALE;
1488 rpcstats.rpcretries++;
1490 rep->r_flags |= R_SENT;
1491 rpc->rc_sent += RPC_CWNDSCALE;
1497 mtx_unlock(&Giant); /* rpc_msg -> tprintf */
1501 timeout_add(rpcclnt_timer, to, rpcclnt_ticks);
1503 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer, NULL);
1508 * Test for a termination condition pending on the process. This is used for
1509 * RPCCLNT_INT mounts.
1512 rpcclnt_sigintr(rpc, task, pr)
1513 struct rpcclnt *rpc;
1514 struct rpctask *task;
1524 /* XXX deal with forced unmounts */
1526 if (task && (task->r_flags & R_SOFTTERM))
1529 if (!(rpc->rc_flag & RPCCLNT_INT))
1537 if (p && p->p_siglist &&
1538 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1544 tmpset = p->p_siglist;
1545 SIGSETNAND(tmpset, pr->td_sigmask);
1546 mtx_lock(&p->p_sigacts->ps_mtx);
1547 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1548 mtx_unlock(&p->p_sigacts->ps_mtx);
1549 if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
1559 * Lock a socket against others. Necessary for STREAM sockets to ensure you
1560 * get an entire rpc request/reply and also to avoid race conditions between
1561 * the processes with nfs requests in progress when a reconnect is necessary.
1564 rpcclnt_sndlock(flagp, task)
1566 struct rpctask *task;
1569 int slpflag = 0, slptimeo = 0;
1572 if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
1574 while (*flagp & RPCCLNT_SNDLOCK) {
1575 if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
1577 *flagp |= RPCCLNT_WANTSND;
1578 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck",
1580 if (slpflag == PCATCH) {
1585 *flagp |= RPCCLNT_SNDLOCK;
1590 * Unlock the stream socket for others.
1593 rpcclnt_sndunlock(flagp)
1597 if ((*flagp & RPCCLNT_SNDLOCK) == 0)
1598 panic("rpc sndunlock");
1599 *flagp &= ~RPCCLNT_SNDLOCK;
1600 if (*flagp & RPCCLNT_WANTSND) {
1601 *flagp &= ~RPCCLNT_WANTSND;
1602 wakeup((caddr_t) flagp);
1607 rpcclnt_rcvlock(task)
1608 struct rpctask *task;
1610 int *flagp = &task->r_rpcclnt->rc_flag;
1611 int slpflag, slptimeo = 0;
1613 if (*flagp & RPCCLNT_INT)
1617 while (*flagp & RPCCLNT_RCVLOCK) {
1618 if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
1620 *flagp |= RPCCLNT_WANTRCV;
1621 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk",
1623 if (slpflag == PCATCH) {
1628 *flagp |= RPCCLNT_RCVLOCK;
1633 * Unlock the stream socket for others.
1636 rpcclnt_rcvunlock(flagp)
1640 if ((*flagp & RPCCLNT_RCVLOCK) == 0)
1641 panic("nfs rcvunlock");
1642 *flagp &= ~RPCCLNT_RCVLOCK;
1643 if (*flagp & RPCCLNT_WANTRCV) {
1644 *flagp &= ~RPCCLNT_WANTRCV;
1645 wakeup((caddr_t) flagp);
1651 * Check for badly aligned mbuf data areas and realign data in an mbuf list
1652 * by copying the data areas up, as required.
1655 rpcclnt_realign(m, hsiz)
1660 int siz, mlen, olen;
1666 * This never happens for UDP, rarely happens for TCP but
1667 * frequently happens for iso transport.
1669 if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) {
1671 fcp = mtod(m, caddr_t);
1672 if ((long)fcp & 0x3) {
1673 if (m->m_flags & M_PKTHDR)
1674 m_tag_delete_chain(m, NULL);
1675 m->m_flags &= ~M_PKTHDR;
1676 if (m->m_flags & M_EXT)
1677 m->m_data = m->m_ext.ext_buf +
1678 ((m->m_ext.ext_size - olen) & ~0x3);
1680 m->m_data = m->m_dat;
1683 tcp = mtod(m, caddr_t);
1688 * If possible, only put the first invariant part of
1689 * the RPC header in the first mbuf.
1691 mlen = M_TRAILINGSPACE(m);
1692 if (olen <= hsiz && mlen > hsiz)
1695 /* Loop through the mbuf list consolidating data. */
1699 if (m2->m_flags & M_PKTHDR)
1700 m_tag_delete_chain(m2, NULL);
1701 m2->m_flags &= ~M_PKTHDR;
1702 if (m2->m_flags & M_EXT)
1703 m2->m_data = m2->m_ext.ext_buf;
1705 m2->m_data = m2->m_dat;
1707 mlen = M_TRAILINGSPACE(m2);
1708 tcp = mtod(m2, caddr_t);
1712 siz = min(mlen, olen);
1714 bcopy(fcp, tcp, siz);
1724 fcp = mtod(m, caddr_t);
1729 * Finally, set m_len == 0 for any trailing mbufs
1730 * that have been copied out of.
1743 rpcclnt_realign(struct mbuf **pm, int hsiz)
1746 struct mbuf *n = NULL;
1749 RPCDEBUG("in rpcclnt_realign()");
1751 while ((m = *pm) != NULL) {
1752 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1753 MGET(n, M_TRYWAIT, MT_DATA);
1754 if (m->m_len >= MINCLSIZE) {
1755 MCLGET(n, M_TRYWAIT);
1764 * If n is non-NULL, loop on m copying data, then replace the
1765 * portion of the chain that had to be realigned.
1769 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1777 RPCDEBUG("leave rpcclnt_realign()");
1782 rpcclnt_msg(p, server, msg)
1789 struct proc *pr = p;
1792 tpr = tprintf_open(p);
1795 tprintf(tpr, "rpc server %s: %s\n", server, msg);
1801 tprintf(p ? p->td_proc : NULL, LOG_INFO,
1802 "nfs server %s: %s\n", server, msg);
1808 * Build the RPC header and fill in the authorization info. The authorization
1809 * string argument is only used when the credentials come from outside of the
1810 * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
1811 * kernel) Returns the head of the mbuf list.
1813 static struct mbuf *
1814 rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred)
1818 u_int32_t mrest_len;
1820 struct mbuf **mheadend;
1821 struct ucred * cred;
1823 /* register */ struct mbuf *mb;
1824 register u_int32_t *tl;
1825 /* register */ caddr_t bpos;
1826 struct mbuf *mreq, *mb2;
1829 MGETHDR(mb, M_TRYWAIT, MT_DATA);
1830 if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
1831 MCLGET(mb, M_TRYWAIT);
1832 } else if (6 * RPCX_UNSIGNED < MHLEN) {
1833 MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
1835 RPCDEBUG("mbuf too small");
1836 panic("cheap bailout");
1840 bpos = mtod(mb, caddr_t);
1843 * First the RPC header.
1845 rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);
1847 /* Get a new (non-zero) xid */
1848 if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
1849 rpcclnt_xid = arc4random();
1850 rpcclnt_xid_touched = 1;
1852 while ((*xidp = arc4random() % 256) == 0);
1853 rpcclnt_xid += *xidp;
1857 *tl++ = *xidp = txdr_unsigned(rpcclnt_xid);
1861 *tl++ = txdr_unsigned(rc->rc_prog->prog_id);
1862 *tl++ = txdr_unsigned(rc->rc_prog->prog_version);
1863 *tl++ = txdr_unsigned(procid);
1865 if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) {
1867 RPCDEBUG("rpcauth_buildheader failed %d", error);
1873 mreq->m_pkthdr.len = m_length(mreq, NULL);
1874 mreq->m_pkthdr.rcvif = NULL;
1879 * Help break down an mbuf chain by setting the first siz bytes contiguous
1880 * pointed to by returned val. This is used by the macros rpcm_dissect and
1881 * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
1884 rpcm_disct(mdp, dposp, siz, left, cp2)
1891 struct mbuf *mp, *mp2;
1897 *mdp = mp = mp->m_next;
1899 RPC_RETURN(EBADRPC);
1901 *dposp = mtod(mp, caddr_t);
1906 } else if (mp->m_next == NULL) {
1907 RPC_RETURN(EBADRPC);
1908 } else if (siz > MHLEN) {
1909 panic("rpc S too big");
1911 MGET(mp2, M_TRYWAIT, MT_DATA);
1912 mp2->m_next = mp->m_next;
1916 *cp2 = p = mtod(mp, caddr_t);
1917 bcopy(*dposp, p, left); /* Copy what was left */
1921 /* Loop around copying up the siz2 bytes */
1924 RPC_RETURN(EBADRPC);
1925 xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
1927 bcopy(mtod(mp2, caddr_t), p, xfer);
1938 *dposp = mtod(mp2, caddr_t);
1946 rpcclnt_proct(rpc, procid)
1947 struct rpcclnt *rpc;
1950 if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
1951 procid < rpc->rc_proctlen) {
1952 return rpc->rc_proct[procid];
1958 rpc_adv(mdp, dposp, offs, left)
1973 RPC_RETURN(EBADRPC);
1977 *dposp = mtod(m, caddr_t) + offs;
1982 rpcclnt_cancelreqs(rpc)
1983 struct rpcclnt *rpc;
1985 struct rpctask *task;
1989 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1990 if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
1991 (task->r_flags & R_SOFTTERM))
1993 rpcclnt_softterm(task);
1997 for (i = 0; i < 30; i++) {
1999 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
2000 if (rpc == task->r_rpcclnt)
2006 tsleep(&lbolt, PSOCK, "nfscancel", 0);
2012 rpcclnt_softterm(struct rpctask * task)
2014 task->r_flags |= R_SOFTTERM;
2015 if (task->r_flags & R_SENT) {
2016 task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
2017 task->r_flags &= ~R_SENT;
2023 /* called by rpcclnt_get() */
2025 rpcclnt_create(struct rpcclnt ** rpc)
2027 MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
2030 /* called by rpcclnt_put() */
2032 rpcclnt_destroy(struct rpcclnt * rpc)
2037 RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
2040 #endif /* !__OpenBSD__ */
2043 /* XXX: add a lock around the auth structure in struct rpcclnt and make this
2044 * call safe for calling durring a connection */
2046 rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp)
2048 size_t authsiz, verfsiz;
2049 uint32_t mlen, grpsiz;
2050 register struct mbuf *mb, *mb2;
2052 register u_int32_t *tl;
2055 if (auth == NULL || mhdr == NULL)
2058 switch (auth->auth_type) {
2064 authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED;
2068 return EPROTONOSUPPORT;
2072 mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED;
2077 rpcm_build(tl, u_int32_t *, mlen);
2082 *tl++ = txdr_unsigned(auth->auth_type);
2083 *tl++ = txdr_unsigned(authsiz);
2084 switch (auth->auth_type) {
2089 *tl++ = txdr_unsigned(cred->cr_uid);
2090 *tl++ = txdr_unsigned(cred->cr_groups[0]);
2091 grpsiz = cred->cr_ngroups;
2092 *tl++ = txdr_unsigned(grpsiz);
2093 /* XXX: groups[0] is already sent... */
2094 for (i = 0 ; i < grpsiz ; i++) {
2095 *tl++ = txdr_unsigned(cred->cr_groups[i]);
2098 /* null verification header */
2099 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2103 /* just a null verf header */
2104 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2108 panic("inconsistent rpc auth type");