2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
6 * the regents of the university of michigan
9 * permission is granted to use, copy, create derivative works and redistribute
10 * this software and such derivative works for any purpose, so long as the name
11 * of the university of michigan is not used in any advertising or publicity
12 * pertaining to the use or distribution of this software without specific,
13 * written prior authorization. if the above copyright notice or any other
14 * identification of the university of michigan is included in any copy of any
15 * portion of this software, then the disclaimer below must also be included.
17 * this software is provided as is, without representation from the university
18 * of michigan as to its fitness for any purpose, and without warranty by the
19 * university of michigan of any kind, either express or implied, including
20 * without limitation the implied warranties of merchantability and fitness for
21 * a particular purpose. the regents of the university of michigan shall not be
22 * liable for any damages, including special, indirect, incidental, or
23 * consequential damages, with respect to any claim arising out of or in
24 * connection with the use of the software, even if it has been or is hereafter
25 * advised of the possibility of such damages.
29 * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30 * California. All rights reserved.
32 * This code is derived from software contributed to Berkeley by Rick Macklem at
33 * The University of Guelph.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions are
37 * met: 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer. 2.
39 * Redistributions in binary form must reproduce the above copyright notice,
40 * this list of conditions and the following disclaimer in the documentation
41 * and/or other materials provided with the distribution. 3. All advertising
42 * materials mentioning features or use of this software must display the
43 * following acknowledgement: This product includes software developed by the
44 * University of California, Berkeley and its contributors. 4. Neither the
45 * name of the University nor the names of its contributors may be used to
46 * endorse or promote products derived from this software without specific
47 * prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66 * can run, but clean it up! */
68 #include <sys/param.h>
69 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
92 #include <nfs/rpcv2.h>
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
97 /* memory management */
99 struct pool rpctask_pool;
100 struct pool rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
104 static MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
110 * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111 * and mean deviation of rtt for the appropriate type of rpc for the frequent
112 * rpcs and a default for the others. The justification for doing "other"
113 * this way is that these rpcs happen so infrequently that timer est. would
114 * probably be stale. Also, since many of these rpcs are non-idempotent, a
115 * conservative timeout is desired. getattr, lookup - A+2D read, write -
116 * A+4D other - nm_timeo
118 #define RPC_RTO(n, t) \
119 ((t) == 0 ? (n)->rc_timeo : \
121 (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122 ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
124 #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
127 #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
132 * There is a congestion window for outstanding rpcs maintained per mount
133 * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134 * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135 * August 1988. describes for TCP. The cwnd size is chopped in half on a
136 * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137 * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138 * are scaled for integer arith.) Variants of "slow start" were tried and
139 * were found to be too much of a performance hit (ave. rtt 3 times larger),
140 * I suspect due to the large rtt that nfs rpcs have.
142 #define RPC_CWNDSCALE 256
143 #define RPC_MAXCWND (RPC_CWNDSCALE * 32)
144 static const int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149 "", /* no good message... */
150 "remote server hasn't exported program.",
151 "remote server can't support version number.",
152 "program can't support procedure.",
153 "procedure can't decode params.",
154 "remote error. remote side memory allocation failure?"
157 char *rpc_errstr_denied[2] = {
158 "remote server doesnt support rpc version 2!",
159 "remote server authentication error."
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
165 "auth error: bad credential (seal broken).",
166 "auth error: client must begin new session.",
167 "auth error: bad verifier (seal broken).",
168 "auth error: verifier expired or replayed.",
169 "auth error: rejected for security reasons.",
173 * Static data, mostly RPC constants in XDR form
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
178 * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179 * rpc_autherr, rpc_auth_kerb;
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
187 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
189 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
198 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
202 * Queue head for rpctask's
205 TAILQ_HEAD(, rpctask) rpctask_q;
206 struct callout rpcclnt_callout;
209 static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
210 static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
212 static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
213 static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
216 static int rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
218 static int rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
219 static void rpcclnt_timer(void *);
220 static int rpcclnt_sndlock(int *, struct rpctask *);
221 static void rpcclnt_sndunlock(int *);
222 static int rpcclnt_rcvlock(struct rpctask *);
223 static void rpcclnt_rcvunlock(int *);
225 void rpcclnt_realign(struct mbuf *, int);
227 static void rpcclnt_realign(struct mbuf **, int);
230 static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
231 static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
232 static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t);
233 static int rpc_adv(struct mbuf **, caddr_t *, int, int);
234 static void rpcclnt_softterm(struct rpctask * task);
236 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
242 static struct timeout rpcclnt_timer_to;
245 rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
246 if (rpcclnt_ticks < 1)
248 rpcstats.rpcretries = 0;
249 rpcstats.rpcrequests = 0;
250 rpcstats.rpctimeouts = 0;
251 rpcstats.rpcunexpected = 0;
252 rpcstats.rpcinvalid = 0;
255 * rpc constants how about actually using more than one of these!
258 rpc_reply = txdr_unsigned(RPC_REPLY);
259 rpc_vers = txdr_unsigned(RPC_VER2);
260 rpc_call = txdr_unsigned(RPC_CALL);
262 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
263 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
264 rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
265 rpc_autherr = txdr_unsigned(RPC_AUTHERR);
266 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
267 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
270 /* initialize rpctask queue */
271 TAILQ_INIT(&rpctask_q);
274 /* initialize pools */
275 pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
277 pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
278 pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
280 pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
282 /* initialize timers */
283 timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
284 rpcclnt_timer(&rpcclnt_timer_to);
285 #else /* !__OpenBSD__ */
286 callout_init(&rpcclnt_callout, 0);
287 #endif /* !__OpenBSD__ */
289 RPCDEBUG("rpc initialed");
298 /* XXX delete sysctl variables? */
299 callout_stop(&rpcclnt_callout);
303 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
304 struct rpcclnt * clnt;
305 struct rpc_program * program;
306 struct sockaddr * addr;
309 struct rpc_auth * auth;
314 if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
317 if (program->prog_name == NULL)
319 clnt->rc_prog = program;
321 clnt->rc_name = addr;
322 clnt->rc_sotype = sotype;
323 clnt->rc_soproto = soproto;
324 clnt->rc_auth = auth;
325 clnt->rc_rsize = max_read_size;
326 clnt->rc_wsize = max_write_size;
327 clnt->rc_flag = flags;
329 clnt->rc_proctlen = 0;
330 clnt->rc_proct = NULL;
336 * Initialize sockets and congestion for a new RPC connection. We do not free
337 * the sockaddr if error.
340 rpcclnt_connect(rpc, td)
345 int s, error, rcvreserve, sndreserve;
346 struct sockaddr *saddr;
349 struct sockaddr_in *sin;
352 struct sockaddr_in sin;
359 RPCDEBUG("no rpcclnt struct!\n");
363 GIANT_REQUIRED; /* XXX until socket locking done */
365 /* create the socket */
368 saddr = rpc->rc_name;
370 error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
371 rpc->rc_soproto, td->td_ucred, td);
374 RPCDEBUG("error %d in socreate()", error);
378 rpc->rc_soflags = so->so_proto->pr_flags;
381 * Some servers require that the client port be a reserved port
382 * number. We always allocate a reserved port, as this prevents
383 * filehandle disclosure through UDP port capture.
385 if (saddr->sa_family == AF_INET) {
392 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
393 mopt->m_len = sizeof(int);
394 ip = mtod(mopt, int *);
395 *ip = IP_PORTRANGE_LOW;
397 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
399 soarg = IP_PORTRANGE_LOW;
400 bzero(&opt, sizeof(struct sockopt));
401 opt.sopt_dir = SOPT_SET;
402 opt.sopt_level = IPPROTO_IP;
403 opt.sopt_name = IP_PORTRANGE;
404 opt.sopt_val = &soarg;
405 opt.sopt_valsize = sizeof(soarg);
407 error = sosetopt(so, &opt);
413 MGET(m, M_TRYWAIT, MT_SONAME);
414 sin = mtod(m, struct sockaddr_in *);
415 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
416 sin->sin_family = AF_INET;
417 sin->sin_addr.s_addr = INADDR_ANY;
418 sin->sin_port = htons(0);
419 error = sobind(so, m);
422 sin.sin_len = sizeof(struct sockaddr_in);
423 sin.sin_family = AF_INET;
424 sin.sin_addr.s_addr = INADDR_ANY;
425 sin.sin_port = htons(0);
427 * &thread0 gives us root credentials to ensure sobind
428 * will give us a reserved ephemeral port.
430 error = sobind(so, (struct sockaddr *) & sin, &thread0);
436 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
437 mopt->m_len = sizeof(int);
438 ip = mtod(mopt, int *);
439 *ip = IP_PORTRANGE_DEFAULT;
440 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
442 soarg = IP_PORTRANGE_DEFAULT;
443 bzero(&opt, sizeof(struct sockopt));
444 opt.sopt_dir = SOPT_SET;
445 opt.sopt_level = IPPROTO_IP;
446 opt.sopt_name = IP_PORTRANGE;
447 opt.sopt_val = &soarg;
448 opt.sopt_valsize = sizeof(soarg);
449 error = sosetopt(so, &opt);
455 * Protocols that do not require connections may be optionally left
456 * unconnected for servers that reply from a port other than
459 if (rpc->rc_flag & RPCCLNT_NOCONN) {
460 if (rpc->rc_soflags & PR_CONNREQUIRED) {
465 error = soconnect(so, saddr, td);
470 * Wait for the connection to complete. Cribbed from the
471 * connect system call but with the wait timing out so that
472 * interruptible mounts don't hang here for a long time.
479 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
480 (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
484 * XXX needs to catch interrupt signals. something
485 * like this: if ((so->so_state & SS_ISCONNECTING) &&
486 * so->so_error == 0 && rep && (error =
487 * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
488 * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
493 error = so->so_error;
500 if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
501 so->so_rcv.sb_timeo = (5 * hz);
502 so->so_snd.sb_timeo = (5 * hz);
504 so->so_rcv.sb_timeo = 0;
505 so->so_snd.sb_timeo = 0;
509 if (rpc->rc_sotype == SOCK_DGRAM) {
510 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
511 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
512 } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
513 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
514 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
516 if (rpc->rc_sotype != SOCK_STREAM)
517 panic("rpcclnt_connect() bad sotype");
518 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
520 MGET(m, M_TRYWAIT, MT_SOOPTS);
521 *mtod(m, int32_t *) = 1;
522 m->m_len = sizeof(int32_t);
523 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
527 bzero(&opt, sizeof(struct sockopt));
528 opt.sopt_dir = SOPT_SET;
529 opt.sopt_level = SOL_SOCKET;
530 opt.sopt_name = SO_KEEPALIVE;
531 opt.sopt_val = &soarg;
532 opt.sopt_valsize = sizeof(soarg);
536 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
538 MGET(m, M_TRYWAIT, MT_SOOPTS);
539 *mtod(m, int32_t *) = 1;
540 m->m_len = sizeof(int32_t);
541 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
545 bzero(&opt, sizeof(struct sockopt));
546 opt.sopt_dir = SOPT_SET;
547 opt.sopt_level = IPPROTO_TCP;
548 opt.sopt_name = TCP_NODELAY;
549 opt.sopt_val = &soarg;
550 opt.sopt_valsize = sizeof(soarg);
554 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
555 sizeof(u_int32_t)) * 2;
556 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
557 sizeof(u_int32_t)) * 2;
559 error = soreserve(so, sndreserve, rcvreserve);
562 so->so_rcv.sb_flags |= SB_NOINTR;
563 so->so_snd.sb_flags |= SB_NOINTR;
565 /* Initialize other non-zero congestion variables */
566 rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
567 rpc->rc_srtt[3] = (RPC_TIMEO << 3);
568 rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
569 rpc->rc_sdrtt[3] = 0;
570 rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
572 rpc->rc_timeouts = 0;
576 rpcclnt_disconnect(rpc);
583 * Called when a connection is broken on a reliable protocol.
584 * - clean up the old socket
585 * - rpcclnt_connect() again
586 * - set R_MUSTRESEND for all outstanding requests on mount point
587 * If this fails the mount point is DEAD!
588 * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
591 rpcclnt_reconnect(rep, td)
596 struct rpcclnt *rpc = rep->r_rpcclnt;
599 rpcclnt_disconnect(rpc);
600 while ((error = rpcclnt_connect(rpc, td)) != 0) {
601 if (error == EINTR || error == ERESTART)
603 tsleep(&lbolt, PSOCK, "rpccon", 0);
607 * Loop through outstanding request list and fix up all requests on
610 for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
611 rp = TAILQ_NEXT(rp, r_chain)) {
612 if (rp->r_rpcclnt == rpc)
613 rp->r_flags |= R_MUSTRESEND;
619 * RPC transport disconnect. Clean up and unlink.
622 rpcclnt_disconnect(rpc)
627 GIANT_REQUIRED; /* XXX until socket locking done */
638 rpcclnt_safedisconnect(struct rpcclnt * rpc)
640 struct rpctask dummytask;
642 bzero(&dummytask, sizeof(dummytask));
643 dummytask.r_rpcclnt = rpc;
644 rpcclnt_rcvlock(&dummytask);
645 rpcclnt_disconnect(rpc);
646 rpcclnt_rcvunlock(&rpc->rc_flag);
650 * This is the rpc send routine. For connection based socket types, it
651 * must be called with an rpcclnt_sndlock() on the socket.
652 * "rep == NULL" indicates that it has been called from a server.
653 * For the client side:
654 * - return EINTR if the RPC is terminated, 0 otherwise
655 * - set R_MUSTRESEND if the send fails for any reason
656 * - do any cleanup required by recoverable socket errors (?)
657 * For the server side:
658 * - return EINTR or ERESTART if interrupted by a signal
659 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
660 * - do any cleanup required by recoverable socket errors (?)
663 rpcclnt_send(so, nam, top, rep)
668 struct sockaddr *nam;
674 struct mbuf *sendnam;
676 struct sockaddr *sendnam;
677 struct thread *td = curthread;
679 int error, soflags, flags;
681 GIANT_REQUIRED; /* XXX until socket locking done */
684 if (rep->r_flags & R_SOFTTERM) {
688 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
689 rep->r_flags |= R_MUSTRESEND;
693 rep->r_flags &= ~R_MUSTRESEND;
694 soflags = rep->r_rpcclnt->rc_soflags;
696 soflags = so->so_proto->pr_flags;
698 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
703 if (so->so_type == SOCK_SEQPACKET)
708 error = sosend(so, sendnam, NULL, top, NULL, flags, td);
712 log(LOG_INFO, "rpc send error %d for service %s\n", error,
713 rep->r_rpcclnt->rc_prog->prog_name);
715 * Deal with errors for the client side.
717 if (rep->r_flags & R_SOFTTERM)
720 rep->r_flags |= R_MUSTRESEND;
722 log(LOG_INFO, "rpc service send error %d\n", error);
725 * Handle any recoverable (soft) socket errors here.
727 if (error != EINTR && error != ERESTART &&
728 error != EWOULDBLOCK && error != EPIPE)
735 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
736 * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
737 * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
738 * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
739 * must be very careful to read an entire record once we have read any of it,
740 * even if the system call has been interrupted.
743 rpcclnt_receive(rep, aname, mp, td)
748 struct sockaddr **aname;
757 struct mbuf *control;
760 struct mbuf **getnam;
762 struct sockaddr **getnam;
764 int error, sotype, rcvflg;
766 GIANT_REQUIRED; /* XXX until socket locking done */
769 * Set up arguments for soreceive()
773 sotype = rep->r_rpcclnt->rc_sotype;
776 * For reliable protocols, lock against other senders/receivers in
777 * case a reconnect is necessary. For SOCK_STREAM, first get the
778 * Record Mark to find out how much more there is to get. We must
779 * lock the socket against other receivers until we have an entire
782 if (sotype != SOCK_DGRAM) {
783 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
788 * Check for fatal errors and resending request.
791 * Ugh: If a reconnect attempt just happened, rc_so would
792 * have changed. NULL indicates a failed attempt that has
793 * essentially shut down this mount point.
795 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
796 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
799 so = rep->r_rpcclnt->rc_so;
801 error = rpcclnt_reconnect(rep, td);
803 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
808 while (rep->r_flags & R_MUSTRESEND) {
809 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
810 rpcstats.rpcretries++;
811 error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
813 if (error == EINTR || error == ERESTART ||
814 (error = rpcclnt_reconnect(rep, td)) != 0) {
815 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
821 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
822 if (sotype == SOCK_STREAM) {
823 aio.iov_base = (caddr_t) & len;
824 aio.iov_len = sizeof(u_int32_t);
827 auio.uio_segflg = UIO_SYSSPACE;
828 auio.uio_rw = UIO_READ;
830 auio.uio_resid = sizeof(u_int32_t);
837 rcvflg = MSG_WAITALL;
838 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
839 if (error == EWOULDBLOCK && rep) {
840 if (rep->r_flags & R_SOFTTERM)
843 } while (error == EWOULDBLOCK);
844 if (!error && auio.uio_resid > 0) {
846 "short receive (%zu/%zu) from rpc server %s\n",
847 sizeof(u_int32_t) - auio.uio_resid,
849 rep->r_rpcclnt->rc_prog->prog_name);
854 len = ntohl(len) & ~0x80000000;
856 * This is SERIOUS! We are out of sync with the
857 * sender and forcing a disconnect/reconnect is all I
860 if (len > RPC_MAXPACKET) {
861 log(LOG_ERR, "%s (%d) from rpc server %s\n",
862 "impossible packet length",
864 rep->r_rpcclnt->rc_prog->prog_name);
868 auio.uio_resid = len;
870 rcvflg = MSG_WAITALL;
871 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
872 } while (error == EWOULDBLOCK || error == EINTR ||
874 if (!error && auio.uio_resid > 0) {
876 "short receive (%d/%d) from rpc server %s\n",
877 len - auio.uio_resid, len,
878 rep->r_rpcclnt->rc_prog->prog_name);
883 * NB: Since uio_resid is big, MSG_WAITALL is ignored
884 * and soreceive() will return when it has either a
885 * control msg or a data msg. We have no use for
886 * control msg., but must grab them and then throw
887 * them away so we know what is going on.
889 auio.uio_resid = len = 100000000; /* Anything Big */
897 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
900 if (error == EWOULDBLOCK && rep) {
901 if (rep->r_flags & R_SOFTTERM)
904 } while (error == EWOULDBLOCK ||
905 (!error && *mp == NULL && control));
906 if ((rcvflg & MSG_EOR) == 0)
908 if (!error && *mp == NULL)
910 len -= auio.uio_resid;
913 if (error && error != EINTR && error != ERESTART) {
915 *mp = (struct mbuf *) 0;
918 "receive error %d from rpc server %s\n",
920 rep->r_rpcclnt->rc_prog->prog_name);
921 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
923 error = rpcclnt_reconnect(rep, td);
928 if ((so = rep->r_rpcclnt->rc_so) == NULL)
930 if (so->so_state & SS_ISCONNECTED)
934 auio.uio_resid = len = 1000000;
943 error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
944 RPCDEBUG("soreceivce returns %d", error);
945 if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
946 RPCDEBUG("wouldblock && softerm -> EINTR");
949 } while (error == EWOULDBLOCK);
950 len -= auio.uio_resid;
957 * Search for any mbufs that are not a multiple of 4 bytes
958 * long or with m_data not longword aligned. These could
959 * cause pointer alignment problems, so copy them to well
962 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
969 * Implement receipt of reply on a socket. We must search through the list of
970 * received datagrams matching them with outstanding requests using the xid,
971 * until ours is found.
975 rpcclnt_reply(myrep, td)
976 struct rpctask *myrep;
980 struct rpcclnt *rpc = myrep->r_rpcclnt;
982 struct mbuf *mrep, *md;
986 struct sockaddr *nam;
993 * Loop around until we get our own reply
997 * Lock against other receivers so that I don't get stuck in
998 * sbwait() after someone else has received my reply for me.
999 * Also necessary for connection based protocols to avoid
1000 * race conditions during a reconnect.
1002 error = rpcclnt_rcvlock(myrep);
1005 /* Already received, bye bye */
1006 if (myrep->r_mrep != NULL) {
1007 rpcclnt_rcvunlock(&rpc->rc_flag);
1011 * Get the next Rpc reply off the socket
1013 error = rpcclnt_receive(myrep, &nam, &mrep, td);
1015 rpcclnt_rcvunlock(&rpc->rc_flag);
1019 * Ignore routing errors on connectionless
1022 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
1023 rpc->rc_so->so_error = 0;
1024 if (myrep->r_flags & R_GETONEREP)
1026 RPCDEBUG("ingoring routing error on connectionless protocol.");
1036 FREE(nam, M_SONAME);
1040 * Get the xid and check that it is an rpc reply
1043 dpos = mtod(md, caddr_t);
1044 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1046 if (*tl != rpc_reply) {
1047 rpcstats.rpcinvalid++;
1050 if (myrep->r_flags & R_GETONEREP)
1055 * Loop through the request list to match up the reply Iff no
1056 * match, just drop the datagram
1058 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1059 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1066 * Update congestion window. Do the additive
1067 * increase of one rpc/rtt.
1069 if (rpc->rc_cwnd <= rpc->rc_sent) {
1071 (RPC_CWNDSCALE * RPC_CWNDSCALE +
1072 (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
1073 if (rpc->rc_cwnd > RPC_MAXCWND)
1074 rpc->rc_cwnd = RPC_MAXCWND;
1076 rep->r_flags &= ~R_SENT;
1077 rpc->rc_sent -= RPC_CWNDSCALE;
1079 * Update rtt using a gain of 0.125 on the
1080 * mean and a gain of 0.25 on the deviation.
1082 if (rep->r_flags & R_TIMING) {
1084 * Since the timer resolution of
1085 * NFS_HZ is so course, it can often
1086 * result in r_rtt == 0. Since r_rtt
1087 * == N means that the actual rtt is
1088 * between N+dt and N+2-dt ticks, add
1091 t1 = rep->r_rtt + 1;
1092 t1 -= (RPC_SRTT(rpc, rep) >> 3);
1093 RPC_SRTT(rpc, rep) += t1;
1096 t1 -= (RPC_SDRTT(rpc, rep) >> 2);
1097 RPC_SDRTT(rpc, rep) += t1;
1099 rpc->rc_timeouts = 0;
1104 * If not matched to a request, drop it. If it's mine, get
1108 rpcstats.rpcunexpected++;
1109 RPCDEBUG("rpc reply not matched\n");
1111 } else if (rep == myrep) {
1112 if (rep->r_mrep == NULL)
1113 panic("rpcreply nil");
1116 if (myrep->r_flags & R_GETONEREP)
1121 /* XXX: ignores tryagain! */
1123 * code from nfs_request - goes something like this
1124 * - fill in task struct
1125 * - links task into list
1126 * - calls rpcclnt_send() for first transmit
1127 * - calls rpcclnt_reply() to get reply
1128 * - fills in reply (which should be initialized prior to
1129 * calling), which is valid when 0 is returned and is
1130 * NEVER freed in this function
1132 * nb: always frees the request header, but NEVER frees 'mrest'
1134 * rpcclnt_setauth() should be used before calling this. EAUTH is returned if
1135 * authentication fails.
1137 * note that reply->result_* are invalid unless reply->type ==
1138 * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
1139 * are invalid unless reply->type == RPC_MSGACCEPTED
1142 rpcclnt_request(rpc, mrest, procnum, td, cred, reply)
1143 struct rpcclnt *rpc;
1148 struct rpc_reply *reply;
1150 struct mbuf *m, *mrep;
1151 struct rpctask *task;
1153 struct mbuf *md, *mheadend;
1155 int t1, s, error = 0, mrest_len;
1159 task = pool_get(&rpctask_pool, PR_WAITOK);
1161 MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO));
1164 task->r_rpcclnt = rpc;
1165 task->r_procnum = procnum;
1168 mrest_len = m_length(mrest, NULL);
1170 m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend,
1174 * For stream protocols, insert a Sun RPC Record Mark.
1176 if (rpc->rc_sotype == SOCK_STREAM) {
1177 M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
1178 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1179 (m->m_pkthdr.len - RPCX_UNSIGNED));
1184 if (rpc->rc_flag & RPCCLNT_SOFT)
1185 task->r_retry = rpc->rc_retry;
1187 task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */
1188 task->r_rtt = task->r_rexmit = 0;
1190 if (rpcclnt_proct(rpc, procnum) > 0)
1191 task->r_flags = R_TIMING;
1194 task->r_mrep = NULL;
1197 * Do the client side RPC.
1199 rpcstats.rpcrequests++;
1202 * Chain request into list of outstanding requests. Be sure to put it
1203 * LAST so timer finds oldest requests first.
1206 if (TAILQ_EMPTY(&rpctask_q))
1207 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer,
1209 TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);
1212 * If backing off another request or avoiding congestion, don't send
1213 * this one now but let timer do it. If not timing a request, do it
1216 if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
1217 (rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1218 rpc->rc_sent < rpc->rc_cwnd)) {
1221 if (rpc->rc_soflags & PR_CONNREQUIRED)
1222 error = rpcclnt_sndlock(&rpc->rc_flag, task);
1224 error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
1225 m_copym(m, 0, M_COPYALL, M_TRYWAIT),
1227 if (rpc->rc_soflags & PR_CONNREQUIRED)
1228 rpcclnt_sndunlock(&rpc->rc_flag);
1230 if (!error && (task->r_flags & R_MUSTRESEND) == 0) {
1231 rpc->rc_sent += RPC_CWNDSCALE;
1232 task->r_flags |= R_SENT;
1240 * Wait for the reply from our send or the timer's.
1242 if (!error || error == EPIPE)
1243 error = rpcclnt_reply(task, td);
1246 * RPC done, unlink the request.
1249 TAILQ_REMOVE(&rpctask_q, task, r_chain);
1250 if (TAILQ_EMPTY(&rpctask_q))
1251 callout_stop(&rpcclnt_callout);
1255 * Decrement the outstanding request count.
1257 if (task->r_flags & R_SENT) {
1258 task->r_flags &= ~R_SENT; /* paranoia */
1259 rpc->rc_sent -= RPC_CWNDSCALE;
1262 * If there was a successful reply and a tprintf msg. tprintf a
1265 if (!error && (task->r_flags & R_TPRINTFMSG)) {
1267 rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name,
1272 /* free request header (leaving mrest) */
1273 mheadend->m_next = NULL;
1274 m_freem(task->r_mreq);
1276 /* initialize reply */
1277 reply->mrep = task->r_mrep;
1278 reply->verf_md = NULL;
1279 reply->result_md = NULL;
1281 mrep = task->r_mrep;
1283 dpos = task->r_dpos;
1285 /* task structure is no longer needed */
1287 pool_put(&rpctask_pool, task);
1296 * break down the rpc header and check if ok
1299 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1300 reply->stat.type = fxdr_unsigned(u_int32_t, *tl);
1302 if (reply->stat.type == RPC_MSGDENIED) {
1303 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1304 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1306 switch (reply->stat.status) {
1308 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1309 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1310 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1314 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1315 reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
1323 } else if (reply->stat.type != RPC_MSGACCEPTED) {
1328 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1330 reply->verf_md = md;
1331 reply->verf_dpos = dpos;
1333 reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
1334 reply->verf_size = fxdr_unsigned(u_int32_t, *tl);
1336 if (reply->verf_size != 0)
1337 rpcm_adv(rpcm_rndup(reply->verf_size));
1339 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1340 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1342 if (reply->stat.status == RPC_SUCCESS) {
1343 if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
1344 RPCDEBUG("where is the next mbuf?");
1345 RPCDEBUG("%d -> %d",
1346 (int)(dpos - mtod(md, caddr_t)), md->m_len);
1347 if (md->m_next == NULL) {
1351 reply->result_md = md->m_next;
1352 reply->result_dpos = mtod(reply->result_md,
1356 reply->result_md = md;
1357 reply->result_dpos = dpos;
1359 } else if (reply->stat.status == RPC_PROGMISMATCH) {
1360 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1361 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1362 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1366 error = EPROTONOSUPPORT;
1378 * Scan the rpctask list and retranmit any requests that have timed out.
1379 * To avoid retransmission attempts on STREAM sockets (in the future) make
1380 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1387 struct timeout *to = (struct timeout *) arg;
1389 struct rpctask *rep;
1392 struct rpcclnt *rpc;
1397 struct thread *td = curthread;
1405 mtx_lock(&Giant); /* rpc_msg -> tprintf */
1406 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1407 rpc = rep->r_rpcclnt;
1408 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1410 if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
1411 rep->r_flags |= R_SOFTTERM;
1414 if (rep->r_rtt >= 0) {
1416 if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
1417 timeo = rpc->rc_timeo;
1419 timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt,
1421 if (rpc->rc_timeouts > 0)
1422 timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
1423 if (rep->r_rtt <= timeo)
1425 if (rpc->rc_timeouts < 8)
1429 * Check for server not responding
1431 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1432 rep->r_rexmit > rpc->rc_deadthresh) {
1433 rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
1435 rep->r_flags |= R_TPRINTFMSG;
1437 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1438 rpcstats.rpctimeouts++;
1439 rep->r_flags |= R_SOFTTERM;
1442 if (rpc->rc_sotype != SOCK_DGRAM) {
1443 if (++rep->r_rexmit > RPC_MAXREXMIT)
1444 rep->r_rexmit = RPC_MAXREXMIT;
1447 if ((so = rpc->rc_so) == NULL)
1451 * If there is enough space and the window allows.. Resend it
1452 * Set r_rtt to -1 in case we fail to send it now.
1455 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1456 ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1457 (rep->r_flags & R_SENT) ||
1458 rpc->rc_sent < rpc->rc_cwnd) &&
1459 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
1460 if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
1461 error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m,
1464 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td);
1466 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
1470 * Iff first send, start timing else turn
1471 * timing off, backoff timer and divide
1472 * congestion window by 2.
1474 if (rep->r_flags & R_SENT) {
1475 rep->r_flags &= ~R_TIMING;
1476 if (++rep->r_rexmit > RPC_MAXREXMIT)
1477 rep->r_rexmit = RPC_MAXREXMIT;
1479 if (rpc->rc_cwnd < RPC_CWNDSCALE)
1480 rpc->rc_cwnd = RPC_CWNDSCALE;
1481 rpcstats.rpcretries++;
1483 rep->r_flags |= R_SENT;
1484 rpc->rc_sent += RPC_CWNDSCALE;
1490 mtx_unlock(&Giant); /* rpc_msg -> tprintf */
1494 timeout_add(rpcclnt_timer, to, rpcclnt_ticks);
1496 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer, NULL);
1501 * Test for a termination condition pending on the process. This is used for
1502 * RPCCLNT_INT mounts.
1505 rpcclnt_sigintr(rpc, task, pr)
1506 struct rpcclnt *rpc;
1507 struct rpctask *task;
1517 /* XXX deal with forced unmounts */
1519 if (task && (task->r_flags & R_SOFTTERM))
1522 if (!(rpc->rc_flag & RPCCLNT_INT))
1530 if (p && p->p_siglist &&
1531 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1537 tmpset = p->p_siglist;
1538 SIGSETNAND(tmpset, pr->td_sigmask);
1539 mtx_lock(&p->p_sigacts->ps_mtx);
1540 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1541 mtx_unlock(&p->p_sigacts->ps_mtx);
1542 if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
1552 * Lock a socket against others. Necessary for STREAM sockets to ensure you
1553 * get an entire rpc request/reply and also to avoid race conditions between
1554 * the processes with nfs requests in progress when a reconnect is necessary.
1557 rpcclnt_sndlock(flagp, task)
1559 struct rpctask *task;
1562 int slpflag = 0, slptimeo = 0;
1565 if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
1567 while (*flagp & RPCCLNT_SNDLOCK) {
1568 if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
1570 *flagp |= RPCCLNT_WANTSND;
1571 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck",
1573 if (slpflag == PCATCH) {
1578 *flagp |= RPCCLNT_SNDLOCK;
1583 * Unlock the stream socket for others.
1586 rpcclnt_sndunlock(flagp)
1590 if ((*flagp & RPCCLNT_SNDLOCK) == 0)
1591 panic("rpc sndunlock");
1592 *flagp &= ~RPCCLNT_SNDLOCK;
1593 if (*flagp & RPCCLNT_WANTSND) {
1594 *flagp &= ~RPCCLNT_WANTSND;
1595 wakeup((caddr_t) flagp);
1600 rpcclnt_rcvlock(task)
1601 struct rpctask *task;
1603 int *flagp = &task->r_rpcclnt->rc_flag;
1604 int slpflag, slptimeo = 0;
1606 if (*flagp & RPCCLNT_INT)
1610 while (*flagp & RPCCLNT_RCVLOCK) {
1611 if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
1613 *flagp |= RPCCLNT_WANTRCV;
1614 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk",
1616 if (slpflag == PCATCH) {
1621 *flagp |= RPCCLNT_RCVLOCK;
1626 * Unlock the stream socket for others.
1629 rpcclnt_rcvunlock(flagp)
1633 if ((*flagp & RPCCLNT_RCVLOCK) == 0)
1634 panic("nfs rcvunlock");
1635 *flagp &= ~RPCCLNT_RCVLOCK;
1636 if (*flagp & RPCCLNT_WANTRCV) {
1637 *flagp &= ~RPCCLNT_WANTRCV;
1638 wakeup((caddr_t) flagp);
1644 * Check for badly aligned mbuf data areas and realign data in an mbuf list
1645 * by copying the data areas up, as required.
1648 rpcclnt_realign(m, hsiz)
1653 int siz, mlen, olen;
1659 * This never happens for UDP, rarely happens for TCP but
1660 * frequently happens for iso transport.
1662 if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) {
1664 fcp = mtod(m, caddr_t);
1665 if ((long)fcp & 0x3) {
1666 if (m->m_flags & M_PKTHDR)
1667 m_tag_delete_chain(m, NULL);
1668 m->m_flags &= ~M_PKTHDR;
1669 if (m->m_flags & M_EXT)
1670 m->m_data = m->m_ext.ext_buf +
1671 ((m->m_ext.ext_size - olen) & ~0x3);
1673 m->m_data = m->m_dat;
1676 tcp = mtod(m, caddr_t);
1681 * If possible, only put the first invariant part of
1682 * the RPC header in the first mbuf.
1684 mlen = M_TRAILINGSPACE(m);
1685 if (olen <= hsiz && mlen > hsiz)
1688 /* Loop through the mbuf list consolidating data. */
1692 if (m2->m_flags & M_PKTHDR)
1693 m_tag_delete_chain(m2, NULL);
1694 m2->m_flags &= ~M_PKTHDR;
1695 if (m2->m_flags & M_EXT)
1696 m2->m_data = m2->m_ext.ext_buf;
1698 m2->m_data = m2->m_dat;
1700 mlen = M_TRAILINGSPACE(m2);
1701 tcp = mtod(m2, caddr_t);
1705 siz = min(mlen, olen);
1707 bcopy(fcp, tcp, siz);
1717 fcp = mtod(m, caddr_t);
1722 * Finally, set m_len == 0 for any trailing mbufs
1723 * that have been copied out of.
1736 rpcclnt_realign(struct mbuf **pm, int hsiz)
1739 struct mbuf *n = NULL;
1742 RPCDEBUG("in rpcclnt_realign()");
1744 while ((m = *pm) != NULL) {
1745 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1746 MGET(n, M_TRYWAIT, MT_DATA);
1747 if (m->m_len >= MINCLSIZE) {
1748 MCLGET(n, M_TRYWAIT);
1757 * If n is non-NULL, loop on m copying data, then replace the
1758 * portion of the chain that had to be realigned.
1762 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1770 RPCDEBUG("leave rpcclnt_realign()");
1775 rpcclnt_msg(p, server, msg)
1782 struct proc *pr = p;
1785 tpr = tprintf_open(p);
1788 tprintf(tpr, "rpc server %s: %s\n", server, msg);
1794 tprintf(p ? p->td_proc : NULL, LOG_INFO,
1795 "nfs server %s: %s\n", server, msg);
1801 * Build the RPC header and fill in the authorization info. The authorization
1802 * string argument is only used when the credentials come from outside of the
1803 * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
1804 * kernel) Returns the head of the mbuf list.
1806 static struct mbuf *
1807 rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred)
1811 u_int32_t mrest_len;
1813 struct mbuf **mheadend;
1814 struct ucred * cred;
1816 /* register */ struct mbuf *mb;
1817 register u_int32_t *tl;
1818 /* register */ caddr_t bpos;
1819 struct mbuf *mreq, *mb2;
1822 MGETHDR(mb, M_TRYWAIT, MT_DATA);
1823 if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
1824 MCLGET(mb, M_TRYWAIT);
1825 } else if (6 * RPCX_UNSIGNED < MHLEN) {
1826 MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
1828 RPCDEBUG("mbuf too small");
1829 panic("cheap bailout");
1833 bpos = mtod(mb, caddr_t);
1836 * First the RPC header.
1838 rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);
1840 /* Get a new (non-zero) xid */
1841 if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
1842 rpcclnt_xid = arc4random();
1843 rpcclnt_xid_touched = 1;
1845 while ((*xidp = arc4random() % 256) == 0);
1846 rpcclnt_xid += *xidp;
1850 *tl++ = *xidp = txdr_unsigned(rpcclnt_xid);
1854 *tl++ = txdr_unsigned(rc->rc_prog->prog_id);
1855 *tl++ = txdr_unsigned(rc->rc_prog->prog_version);
1856 *tl++ = txdr_unsigned(procid);
1858 if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) {
1859 RPCDEBUG("rpcauth_buildheader failed %d", error);
1865 mreq->m_pkthdr.len = m_length(mreq, NULL);
1866 mreq->m_pkthdr.rcvif = NULL;
1871 * Help break down an mbuf chain by setting the first siz bytes contiguous
1872 * pointed to by returned val. This is used by the macros rpcm_dissect and
1873 * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
1876 rpcm_disct(mdp, dposp, siz, left, cp2)
1883 struct mbuf *mp, *mp2;
1889 *mdp = mp = mp->m_next;
1891 RPC_RETURN(EBADRPC);
1893 *dposp = mtod(mp, caddr_t);
1898 } else if (mp->m_next == NULL) {
1899 RPC_RETURN(EBADRPC);
1900 } else if (siz > MHLEN) {
1901 panic("rpc S too big");
1903 MGET(mp2, M_TRYWAIT, MT_DATA);
1904 mp2->m_next = mp->m_next;
1908 *cp2 = p = mtod(mp, caddr_t);
1909 bcopy(*dposp, p, left); /* Copy what was left */
1913 /* Loop around copying up the siz2 bytes */
1916 RPC_RETURN(EBADRPC);
1917 xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
1919 bcopy(mtod(mp2, caddr_t), p, xfer);
1930 *dposp = mtod(mp2, caddr_t);
1938 rpcclnt_proct(rpc, procid)
1939 struct rpcclnt *rpc;
1942 if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
1943 procid < rpc->rc_proctlen) {
1944 return rpc->rc_proct[procid];
1950 rpc_adv(mdp, dposp, offs, left)
1965 RPC_RETURN(EBADRPC);
1969 *dposp = mtod(m, caddr_t) + offs;
1974 rpcclnt_cancelreqs(rpc)
1975 struct rpcclnt *rpc;
1977 struct rpctask *task;
1981 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1982 if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
1983 (task->r_flags & R_SOFTTERM))
1985 rpcclnt_softterm(task);
1989 for (i = 0; i < 30; i++) {
1991 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1992 if (rpc == task->r_rpcclnt)
1998 tsleep(&lbolt, PSOCK, "nfscancel", 0);
2004 rpcclnt_softterm(struct rpctask * task)
2006 task->r_flags |= R_SOFTTERM;
2007 if (task->r_flags & R_SENT) {
2008 task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
2009 task->r_flags &= ~R_SENT;
2015 /* called by rpcclnt_get() */
2017 rpcclnt_create(struct rpcclnt ** rpc)
2019 MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
2022 /* called by rpcclnt_put() */
2024 rpcclnt_destroy(struct rpcclnt * rpc)
2029 RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
2032 #endif /* !__OpenBSD__ */
2035 /* XXX: add a lock around the auth structure in struct rpcclnt and make this
2036 * call safe for calling durring a connection */
2038 rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp)
2040 size_t authsiz, verfsiz;
2041 uint32_t mlen, grpsiz;
2042 register struct mbuf *mb, *mb2;
2044 register u_int32_t *tl;
2047 if (auth == NULL || mhdr == NULL)
2050 switch (auth->auth_type) {
2056 authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED;
2060 return EPROTONOSUPPORT;
2064 mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED;
2069 rpcm_build(tl, u_int32_t *, mlen);
2074 *tl++ = txdr_unsigned(auth->auth_type);
2075 *tl++ = txdr_unsigned(authsiz);
2076 switch (auth->auth_type) {
2081 *tl++ = txdr_unsigned(cred->cr_uid);
2082 *tl++ = txdr_unsigned(cred->cr_groups[0]);
2083 grpsiz = cred->cr_ngroups;
2084 *tl++ = txdr_unsigned(grpsiz);
2085 /* XXX: groups[0] is already sent... */
2086 for (i = 0 ; i < grpsiz ; i++) {
2087 *tl++ = txdr_unsigned(cred->cr_groups[i]);
2090 /* null verification header */
2091 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2095 /* just a null verf header */
2096 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2100 panic("inconsistent rpc auth type");