2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
6 * the regents of the university of michigan
9 * permission is granted to use, copy, create derivative works and redistribute
10 * this software and such derivative works for any purpose, so long as the name
11 * of the university of michigan is not used in any advertising or publicity
12 * pertaining to the use or distribution of this software without specific,
13 * written prior authorization. if the above copyright notice or any other
14 * identification of the university of michigan is included in any copy of any
15 * portion of this software, then the disclaimer below must also be included.
17 * this software is provided as is, without representation from the university
18 * of michigan as to its fitness for any purpose, and without warranty by the
19 * university of michigan of any kind, either express or implied, including
20 * without limitation the implied warranties of merchantability and fitness for
21 * a particular purpose. the regents of the university of michigan shall not be
22 * liable for any damages, including special, indirect, incidental, or
23 * consequential damages, with respect to any claim arising out of or in
24 * connection with the use of the software, even if it has been or is hereafter
25 * advised of the possibility of such damages.
29 * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30 * California. All rights reserved.
32 * This code is derived from software contributed to Berkeley by Rick Macklem at
33 * The University of Guelph.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions are
37 * met: 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer. 2.
39 * Redistributions in binary form must reproduce the above copyright notice,
40 * this list of conditions and the following disclaimer in the documentation
41 * and/or other materials provided with the distribution. 3. All advertising
42 * materials mentioning features or use of this software must display the
43 * following acknowledgement: This product includes software developed by the
44 * University of California, Berkeley and its contributors. 4. Neither the
45 * name of the University nor the names of its contributors may be used to
46 * endorse or promote products derived from this software without specific
47 * prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66 * can run, but clean it up! */
68 #include <sys/param.h>
69 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
92 #include <nfs/rpcv2.h>
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
97 /* memory management */
99 struct pool rpctask_pool;
100 struct pool rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
104 static MALLOC_DEFINE(M_RPC, "rpcclnt", "rpc state");
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
110 * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111 * and mean deviation of rtt for the appropriate type of rpc for the frequent
112 * rpcs and a default for the others. The justification for doing "other"
113 * this way is that these rpcs happen so infrequently that timer est. would
114 * probably be stale. Also, since many of these rpcs are non-idempotent, a
115 * conservative timeout is desired. getattr, lookup - A+2D read, write -
116 * A+4D other - nm_timeo
118 #define RPC_RTO(n, t) \
119 ((t) == 0 ? (n)->rc_timeo : \
121 (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122 ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
124 #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
127 #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
132 * There is a congestion window for outstanding rpcs maintained per mount
133 * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134 * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135 * August 1988. describes for TCP. The cwnd size is chopped in half on a
136 * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137 * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138 * are scaled for integer arith.) Variants of "slow start" were tried and
139 * were found to be too much of a performance hit (ave. rtt 3 times larger),
140 * I suspect due to the large rtt that nfs rpcs have.
142 #define RPC_CWNDSCALE 256
143 #define RPC_MAXCWND (RPC_CWNDSCALE * 32)
144 static const int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149 "", /* no good message... */
150 "remote server hasn't exported program.",
151 "remote server can't support version number.",
152 "program can't support procedure.",
153 "procedure can't decode params.",
154 "remote error. remote side memory allocation failure?"
157 char *rpc_errstr_denied[2] = {
158 "remote server doesnt support rpc version 2!",
159 "remote server authentication error."
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
165 "auth error: bad credential (seal broken).",
166 "auth error: client must begin new session.",
167 "auth error: bad verifier (seal broken).",
168 "auth error: verifier expired or replayed.",
169 "auth error: rejected for security reasons.",
173 * Static data, mostly RPC constants in XDR form
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
178 * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179 * rpc_autherr, rpc_auth_kerb;
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
187 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
189 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
198 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
202 * Queue head for rpctask's
205 TAILQ_HEAD(, rpctask) rpctask_q;
206 struct callout rpcclnt_callout;
209 static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
210 static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
212 static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
213 static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
216 static int rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
218 static int rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
219 static void rpcclnt_timer(void *);
220 static int rpcclnt_sndlock(int *, struct rpctask *);
221 static void rpcclnt_sndunlock(int *);
222 static int rpcclnt_rcvlock(struct rpctask *);
223 static void rpcclnt_rcvunlock(int *);
225 void rpcclnt_realign(struct mbuf *, int);
227 static void rpcclnt_realign(struct mbuf **, int);
230 static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
231 static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
232 static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t);
233 static int rpc_adv(struct mbuf **, caddr_t *, int, int);
234 static void rpcclnt_softterm(struct rpctask * task);
236 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
242 static struct timeout rpcclnt_timer_to;
245 rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
246 if (rpcclnt_ticks < 1)
248 rpcstats.rpcretries = 0;
249 rpcstats.rpcrequests = 0;
250 rpcstats.rpctimeouts = 0;
251 rpcstats.rpcunexpected = 0;
252 rpcstats.rpcinvalid = 0;
255 * rpc constants how about actually using more than one of these!
258 rpc_reply = txdr_unsigned(RPC_REPLY);
259 rpc_vers = txdr_unsigned(RPC_VER2);
260 rpc_call = txdr_unsigned(RPC_CALL);
262 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
263 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
264 rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
265 rpc_autherr = txdr_unsigned(RPC_AUTHERR);
266 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
267 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
270 /* initialize rpctask queue */
271 TAILQ_INIT(&rpctask_q);
274 /* initialize pools */
275 pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
277 pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
278 pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
280 pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
282 /* initialize timers */
283 timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
284 rpcclnt_timer(&rpcclnt_timer_to);
285 #else /* !__OpenBSD__ */
286 callout_init(&rpcclnt_callout, 0);
287 #endif /* !__OpenBSD__ */
289 RPCDEBUG("rpc initialed");
298 /* XXX delete sysctl variables? */
299 callout_stop(&rpcclnt_callout);
303 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
304 struct rpcclnt * clnt;
305 struct rpc_program * program;
306 struct sockaddr * addr;
309 struct rpc_auth * auth;
314 if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
317 if (program->prog_name == NULL)
319 clnt->rc_prog = program;
321 clnt->rc_name = addr;
322 clnt->rc_sotype = sotype;
323 clnt->rc_soproto = soproto;
324 clnt->rc_auth = auth;
325 clnt->rc_rsize = max_read_size;
326 clnt->rc_wsize = max_write_size;
327 clnt->rc_flag = flags;
329 clnt->rc_proctlen = 0;
330 clnt->rc_proct = NULL;
336 * Initialize sockets and congestion for a new RPC connection. We do not free
337 * the sockaddr if error.
340 rpcclnt_connect(rpc, td)
345 int s, error, rcvreserve, sndreserve;
346 struct sockaddr *saddr;
349 struct sockaddr_in *sin;
352 struct sockaddr_in sin;
359 RPCDEBUG("no rpcclnt struct!\n");
363 /* create the socket */
366 saddr = rpc->rc_name;
369 error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
370 rpc->rc_soproto, td->td_ucred, td);
374 RPCDEBUG("error %d in socreate()", error);
378 rpc->rc_soflags = so->so_proto->pr_flags;
381 * Some servers require that the client port be a reserved port
382 * number. We always allocate a reserved port, as this prevents
383 * filehandle disclosure through UDP port capture.
385 if (saddr->sa_family == AF_INET) {
392 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
393 mopt->m_len = sizeof(int);
394 ip = mtod(mopt, int *);
395 *ip = IP_PORTRANGE_LOW;
397 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
399 soarg = IP_PORTRANGE_LOW;
400 bzero(&opt, sizeof(struct sockopt));
401 opt.sopt_dir = SOPT_SET;
402 opt.sopt_level = IPPROTO_IP;
403 opt.sopt_name = IP_PORTRANGE;
404 opt.sopt_val = &soarg;
405 opt.sopt_valsize = sizeof(soarg);
407 error = sosetopt(so, &opt);
413 MGET(m, M_TRYWAIT, MT_SONAME);
414 sin = mtod(m, struct sockaddr_in *);
415 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
416 sin->sin_family = AF_INET;
417 sin->sin_addr.s_addr = INADDR_ANY;
418 sin->sin_port = htons(0);
419 error = sobind(so, m);
422 sin.sin_len = sizeof(struct sockaddr_in);
423 sin.sin_family = AF_INET;
424 sin.sin_addr.s_addr = INADDR_ANY;
425 sin.sin_port = htons(0);
427 * &thread0 gives us root credentials to ensure sobind
428 * will give us a reserved ephemeral port.
430 error = sobind(so, (struct sockaddr *) & sin, &thread0);
436 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
437 mopt->m_len = sizeof(int);
438 ip = mtod(mopt, int *);
439 *ip = IP_PORTRANGE_DEFAULT;
440 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
442 soarg = IP_PORTRANGE_DEFAULT;
443 bzero(&opt, sizeof(struct sockopt));
444 opt.sopt_dir = SOPT_SET;
445 opt.sopt_level = IPPROTO_IP;
446 opt.sopt_name = IP_PORTRANGE;
447 opt.sopt_val = &soarg;
448 opt.sopt_valsize = sizeof(soarg);
449 error = sosetopt(so, &opt);
455 * Protocols that do not require connections may be optionally left
456 * unconnected for servers that reply from a port other than
459 if (rpc->rc_flag & RPCCLNT_NOCONN) {
460 if (rpc->rc_soflags & PR_CONNREQUIRED) {
465 error = soconnect(so, saddr, td);
470 * Wait for the connection to complete. Cribbed from the
471 * connect system call but with the wait timing out so that
472 * interruptible mounts don't hang here for a long time.
479 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
480 (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
484 * XXX needs to catch interrupt signals. something
485 * like this: if ((so->so_state & SS_ISCONNECTING) &&
486 * so->so_error == 0 && rep && (error =
487 * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
488 * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
493 error = so->so_error;
500 if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
501 so->so_rcv.sb_timeo = (5 * hz);
502 so->so_snd.sb_timeo = (5 * hz);
504 so->so_rcv.sb_timeo = 0;
505 so->so_snd.sb_timeo = 0;
509 if (rpc->rc_sotype == SOCK_DGRAM) {
510 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
511 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
512 } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
513 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
514 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
516 if (rpc->rc_sotype != SOCK_STREAM)
517 panic("rpcclnt_connect() bad sotype");
518 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
520 MGET(m, M_TRYWAIT, MT_SOOPTS);
521 *mtod(m, int32_t *) = 1;
522 m->m_len = sizeof(int32_t);
523 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
527 bzero(&opt, sizeof(struct sockopt));
528 opt.sopt_dir = SOPT_SET;
529 opt.sopt_level = SOL_SOCKET;
530 opt.sopt_name = SO_KEEPALIVE;
531 opt.sopt_val = &soarg;
532 opt.sopt_valsize = sizeof(soarg);
536 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
538 MGET(m, M_TRYWAIT, MT_SOOPTS);
539 *mtod(m, int32_t *) = 1;
540 m->m_len = sizeof(int32_t);
541 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
545 bzero(&opt, sizeof(struct sockopt));
546 opt.sopt_dir = SOPT_SET;
547 opt.sopt_level = IPPROTO_TCP;
548 opt.sopt_name = TCP_NODELAY;
549 opt.sopt_val = &soarg;
550 opt.sopt_valsize = sizeof(soarg);
554 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
555 sizeof(u_int32_t)) * 2;
556 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
557 sizeof(u_int32_t)) * 2;
559 error = soreserve(so, sndreserve, rcvreserve);
562 so->so_rcv.sb_flags |= SB_NOINTR;
563 so->so_snd.sb_flags |= SB_NOINTR;
565 /* Initialize other non-zero congestion variables */
566 rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
567 rpc->rc_srtt[3] = (RPC_TIMEO << 3);
568 rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
569 rpc->rc_sdrtt[3] = 0;
570 rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
572 rpc->rc_timeouts = 0;
576 rpcclnt_disconnect(rpc);
583 * Called when a connection is broken on a reliable protocol.
584 * - clean up the old socket
585 * - rpcclnt_connect() again
586 * - set R_MUSTRESEND for all outstanding requests on mount point
587 * If this fails the mount point is DEAD!
588 * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
591 rpcclnt_reconnect(rep, td)
596 struct rpcclnt *rpc = rep->r_rpcclnt;
599 rpcclnt_disconnect(rpc);
600 while ((error = rpcclnt_connect(rpc, td)) != 0) {
601 if (error == EINTR || error == ERESTART)
603 tsleep(&lbolt, PSOCK, "rpccon", 0);
607 * Loop through outstanding request list and fix up all requests on
610 for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
611 rp = TAILQ_NEXT(rp, r_chain)) {
612 if (rp->r_rpcclnt == rpc)
613 rp->r_flags |= R_MUSTRESEND;
619 * RPC transport disconnect. Clean up and unlink.
622 rpcclnt_disconnect(rpc)
638 rpcclnt_safedisconnect(struct rpcclnt * rpc)
640 struct rpctask dummytask;
642 bzero(&dummytask, sizeof(dummytask));
643 dummytask.r_rpcclnt = rpc;
644 rpcclnt_rcvlock(&dummytask);
645 rpcclnt_disconnect(rpc);
646 rpcclnt_rcvunlock(&rpc->rc_flag);
650 * This is the rpc send routine. For connection based socket types, it
651 * must be called with an rpcclnt_sndlock() on the socket.
652 * "rep == NULL" indicates that it has been called from a server.
653 * For the client side:
654 * - return EINTR if the RPC is terminated, 0 otherwise
655 * - set R_MUSTRESEND if the send fails for any reason
656 * - do any cleanup required by recoverable socket errors (?)
657 * For the server side:
658 * - return EINTR or ERESTART if interrupted by a signal
659 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
660 * - do any cleanup required by recoverable socket errors (?)
663 rpcclnt_send(so, nam, top, rep)
668 struct sockaddr *nam;
674 struct mbuf *sendnam;
676 struct sockaddr *sendnam;
677 struct thread *td = curthread;
679 int error, soflags, flags;
682 if (rep->r_flags & R_SOFTTERM) {
686 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
687 rep->r_flags |= R_MUSTRESEND;
691 rep->r_flags &= ~R_MUSTRESEND;
692 soflags = rep->r_rpcclnt->rc_soflags;
694 soflags = so->so_proto->pr_flags;
696 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
701 if (so->so_type == SOCK_SEQPACKET)
707 * XXXRW: If/when this code becomes MPSAFE itself, Giant might have
708 * to be conditionally acquired earlier for the stack so has to avoid
709 * lock order reversals with any locks held over rpcclnt_send().
712 error = sosend(so, sendnam, NULL, top, NULL, flags, td);
717 log(LOG_INFO, "rpc send error %d for service %s\n", error,
718 rep->r_rpcclnt->rc_prog->prog_name);
720 * Deal with errors for the client side.
722 if (rep->r_flags & R_SOFTTERM)
725 rep->r_flags |= R_MUSTRESEND;
727 log(LOG_INFO, "rpc service send error %d\n", error);
730 * Handle any recoverable (soft) socket errors here.
732 if (error != EINTR && error != ERESTART &&
733 error != EWOULDBLOCK && error != EPIPE)
740 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
741 * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
742 * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
743 * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
744 * must be very careful to read an entire record once we have read any of it,
745 * even if the system call has been interrupted.
748 rpcclnt_receive(rep, aname, mp, td)
753 struct sockaddr **aname;
762 struct mbuf *control;
765 struct mbuf **getnam;
767 struct sockaddr **getnam;
769 int error, sotype, rcvflg;
772 * Set up arguments for soreceive()
776 sotype = rep->r_rpcclnt->rc_sotype;
779 * For reliable protocols, lock against other senders/receivers in
780 * case a reconnect is necessary. For SOCK_STREAM, first get the
781 * Record Mark to find out how much more there is to get. We must
782 * lock the socket against other receivers until we have an entire
785 if (sotype != SOCK_DGRAM) {
786 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
791 * Check for fatal errors and resending request.
794 * Ugh: If a reconnect attempt just happened, rc_so would
795 * have changed. NULL indicates a failed attempt that has
796 * essentially shut down this mount point.
798 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
799 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
802 so = rep->r_rpcclnt->rc_so;
804 error = rpcclnt_reconnect(rep, td);
806 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
811 while (rep->r_flags & R_MUSTRESEND) {
812 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
813 rpcstats.rpcretries++;
814 error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
816 if (error == EINTR || error == ERESTART ||
817 (error = rpcclnt_reconnect(rep, td)) != 0) {
818 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
824 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
825 if (sotype == SOCK_STREAM) {
826 aio.iov_base = (caddr_t) & len;
827 aio.iov_len = sizeof(u_int32_t);
830 auio.uio_segflg = UIO_SYSSPACE;
831 auio.uio_rw = UIO_READ;
833 auio.uio_resid = sizeof(u_int32_t);
840 rcvflg = MSG_WAITALL;
842 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
844 if (error == EWOULDBLOCK && rep) {
845 if (rep->r_flags & R_SOFTTERM)
848 } while (error == EWOULDBLOCK);
849 if (!error && auio.uio_resid > 0) {
851 "short receive (%zu/%zu) from rpc server %s\n",
852 sizeof(u_int32_t) - auio.uio_resid,
854 rep->r_rpcclnt->rc_prog->prog_name);
859 len = ntohl(len) & ~0x80000000;
861 * This is SERIOUS! We are out of sync with the
862 * sender and forcing a disconnect/reconnect is all I
865 if (len > RPC_MAXPACKET) {
866 log(LOG_ERR, "%s (%d) from rpc server %s\n",
867 "impossible packet length",
869 rep->r_rpcclnt->rc_prog->prog_name);
873 auio.uio_resid = len;
875 rcvflg = MSG_WAITALL;
877 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
879 } while (error == EWOULDBLOCK || error == EINTR ||
881 if (!error && auio.uio_resid > 0) {
883 "short receive (%d/%d) from rpc server %s\n",
884 len - auio.uio_resid, len,
885 rep->r_rpcclnt->rc_prog->prog_name);
890 * NB: Since uio_resid is big, MSG_WAITALL is ignored
891 * and soreceive() will return when it has either a
892 * control msg or a data msg. We have no use for
893 * control msg., but must grab them and then throw
894 * them away so we know what is going on.
896 auio.uio_resid = len = 100000000; /* Anything Big */
905 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
909 if (error == EWOULDBLOCK && rep) {
910 if (rep->r_flags & R_SOFTTERM)
913 } while (error == EWOULDBLOCK ||
914 (!error && *mp == NULL && control));
915 if ((rcvflg & MSG_EOR) == 0)
917 if (!error && *mp == NULL)
919 len -= auio.uio_resid;
922 if (error && error != EINTR && error != ERESTART) {
924 *mp = (struct mbuf *) 0;
927 "receive error %d from rpc server %s\n",
929 rep->r_rpcclnt->rc_prog->prog_name);
930 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
932 error = rpcclnt_reconnect(rep, td);
937 if ((so = rep->r_rpcclnt->rc_so) == NULL)
939 if (so->so_state & SS_ISCONNECTED)
943 auio.uio_resid = len = 1000000;
953 error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
955 RPCDEBUG("soreceive returns %d", error);
956 if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
957 RPCDEBUG("wouldblock && softerm -> EINTR");
960 } while (error == EWOULDBLOCK);
961 len -= auio.uio_resid;
968 * Search for any mbufs that are not a multiple of 4 bytes
969 * long or with m_data not longword aligned. These could
970 * cause pointer alignment problems, so copy them to well
973 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
980 * Implement receipt of reply on a socket. We must search through the list of
981 * received datagrams matching them with outstanding requests using the xid,
982 * until ours is found.
986 rpcclnt_reply(myrep, td)
987 struct rpctask *myrep;
991 struct rpcclnt *rpc = myrep->r_rpcclnt;
993 struct mbuf *mrep, *md;
997 struct sockaddr *nam;
1004 * Loop around until we get our own reply
1008 * Lock against other receivers so that I don't get stuck in
1009 * sbwait() after someone else has received my reply for me.
1010 * Also necessary for connection based protocols to avoid
1011 * race conditions during a reconnect.
1013 error = rpcclnt_rcvlock(myrep);
1016 /* Already received, bye bye */
1017 if (myrep->r_mrep != NULL) {
1018 rpcclnt_rcvunlock(&rpc->rc_flag);
1022 * Get the next Rpc reply off the socket
1024 error = rpcclnt_receive(myrep, &nam, &mrep, td);
1026 rpcclnt_rcvunlock(&rpc->rc_flag);
1030 * Ignore routing errors on connectionless
1033 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error)) {
1034 rpc->rc_so->so_error = 0;
1035 if (myrep->r_flags & R_GETONEREP)
1037 RPCDEBUG("ingoring routing error on connectionless protocol.");
1047 FREE(nam, M_SONAME);
1051 * Get the xid and check that it is an rpc reply
1054 dpos = mtod(md, caddr_t);
1055 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1057 if (*tl != rpc_reply) {
1058 rpcstats.rpcinvalid++;
1061 if (myrep->r_flags & R_GETONEREP)
1066 * Loop through the request list to match up the reply Iff no
1067 * match, just drop the datagram
1069 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1070 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
1077 * Update congestion window. Do the additive
1078 * increase of one rpc/rtt.
1080 if (rpc->rc_cwnd <= rpc->rc_sent) {
1082 (RPC_CWNDSCALE * RPC_CWNDSCALE +
1083 (rpc->rc_cwnd >> 1)) / rpc->rc_cwnd;
1084 if (rpc->rc_cwnd > RPC_MAXCWND)
1085 rpc->rc_cwnd = RPC_MAXCWND;
1087 rep->r_flags &= ~R_SENT;
1088 rpc->rc_sent -= RPC_CWNDSCALE;
1090 * Update rtt using a gain of 0.125 on the
1091 * mean and a gain of 0.25 on the deviation.
1093 if (rep->r_flags & R_TIMING) {
1095 * Since the timer resolution of
1096 * NFS_HZ is so course, it can often
1097 * result in r_rtt == 0. Since r_rtt
1098 * == N means that the actual rtt is
1099 * between N+dt and N+2-dt ticks, add
1102 t1 = rep->r_rtt + 1;
1103 t1 -= (RPC_SRTT(rpc, rep) >> 3);
1104 RPC_SRTT(rpc, rep) += t1;
1107 t1 -= (RPC_SDRTT(rpc, rep) >> 2);
1108 RPC_SDRTT(rpc, rep) += t1;
1110 rpc->rc_timeouts = 0;
1115 * If not matched to a request, drop it. If it's mine, get
1119 rpcstats.rpcunexpected++;
1120 RPCDEBUG("rpc reply not matched\n");
1122 } else if (rep == myrep) {
1123 if (rep->r_mrep == NULL)
1124 panic("rpcreply nil");
1127 if (myrep->r_flags & R_GETONEREP)
1132 /* XXX: ignores tryagain! */
1134 * code from nfs_request - goes something like this
1135 * - fill in task struct
1136 * - links task into list
1137 * - calls rpcclnt_send() for first transmit
1138 * - calls rpcclnt_reply() to get reply
1139 * - fills in reply (which should be initialized prior to
1140 * calling), which is valid when 0 is returned and is
1141 * NEVER freed in this function
1143 * nb: always frees the request header, but NEVER frees 'mrest'
1145 * rpcclnt_setauth() should be used before calling this. EAUTH is returned if
1146 * authentication fails.
1148 * note that reply->result_* are invalid unless reply->type ==
1149 * RPC_MSGACCEPTED and reply->status == RPC_SUCCESS and that reply->verf_*
1150 * are invalid unless reply->type == RPC_MSGACCEPTED
1153 rpcclnt_request(rpc, mrest, procnum, td, cred, reply)
1154 struct rpcclnt *rpc;
1159 struct rpc_reply *reply;
1161 struct mbuf *m, *mrep;
1162 struct rpctask *task;
1164 struct mbuf *md, *mheadend;
1166 int t1, s, error = 0, mrest_len;
1170 task = pool_get(&rpctask_pool, PR_WAITOK);
1172 MALLOC(task, struct rpctask *, sizeof(struct rpctask), M_RPC, (M_WAITOK | M_ZERO));
1175 task->r_rpcclnt = rpc;
1176 task->r_procnum = procnum;
1179 mrest_len = m_length(mrest, NULL);
1181 m = rpcclnt_buildheader(rpc, procnum, mrest, mrest_len, &xid, &mheadend,
1185 * For stream protocols, insert a Sun RPC Record Mark.
1187 if (rpc->rc_sotype == SOCK_STREAM) {
1188 M_PREPEND(m, RPCX_UNSIGNED, M_TRYWAIT);
1189 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1190 (m->m_pkthdr.len - RPCX_UNSIGNED));
1195 if (rpc->rc_flag & RPCCLNT_SOFT)
1196 task->r_retry = rpc->rc_retry;
1198 task->r_retry = RPC_MAXREXMIT + 1; /* past clip limit */
1199 task->r_rtt = task->r_rexmit = 0;
1201 if (rpcclnt_proct(rpc, procnum) > 0)
1202 task->r_flags = R_TIMING;
1205 task->r_mrep = NULL;
1208 * Do the client side RPC.
1210 rpcstats.rpcrequests++;
1213 * Chain request into list of outstanding requests. Be sure to put it
1214 * LAST so timer finds oldest requests first.
1217 if (TAILQ_EMPTY(&rpctask_q))
1218 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer,
1220 TAILQ_INSERT_TAIL(&rpctask_q, task, r_chain);
1223 * If backing off another request or avoiding congestion, don't send
1224 * this one now but let timer do it. If not timing a request, do it
1227 if (rpc->rc_so && (rpc->rc_sotype != SOCK_DGRAM ||
1228 (rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1229 rpc->rc_sent < rpc->rc_cwnd)) {
1232 if (rpc->rc_soflags & PR_CONNREQUIRED)
1233 error = rpcclnt_sndlock(&rpc->rc_flag, task);
1235 error = rpcclnt_send(rpc->rc_so, rpc->rc_name,
1236 m_copym(m, 0, M_COPYALL, M_TRYWAIT),
1238 if (rpc->rc_soflags & PR_CONNREQUIRED)
1239 rpcclnt_sndunlock(&rpc->rc_flag);
1241 if (!error && (task->r_flags & R_MUSTRESEND) == 0) {
1242 rpc->rc_sent += RPC_CWNDSCALE;
1243 task->r_flags |= R_SENT;
1251 * Wait for the reply from our send or the timer's.
1253 if (!error || error == EPIPE)
1254 error = rpcclnt_reply(task, td);
1257 * RPC done, unlink the request.
1260 TAILQ_REMOVE(&rpctask_q, task, r_chain);
1261 if (TAILQ_EMPTY(&rpctask_q))
1262 callout_stop(&rpcclnt_callout);
1266 * Decrement the outstanding request count.
1268 if (task->r_flags & R_SENT) {
1269 task->r_flags &= ~R_SENT; /* paranoia */
1270 rpc->rc_sent -= RPC_CWNDSCALE;
1273 * If there was a successful reply and a tprintf msg. tprintf a
1276 if (!error && (task->r_flags & R_TPRINTFMSG)) {
1278 rpcclnt_msg(task->r_td, rpc->rc_prog->prog_name,
1283 /* free request header (leaving mrest) */
1284 mheadend->m_next = NULL;
1285 m_freem(task->r_mreq);
1287 /* initialize reply */
1288 reply->mrep = task->r_mrep;
1289 reply->verf_md = NULL;
1290 reply->result_md = NULL;
1292 mrep = task->r_mrep;
1294 dpos = task->r_dpos;
1296 /* task structure is no longer needed */
1298 pool_put(&rpctask_pool, task);
1307 * break down the rpc header and check if ok
1310 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1311 reply->stat.type = fxdr_unsigned(u_int32_t, *tl);
1313 if (reply->stat.type == RPC_MSGDENIED) {
1314 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1315 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1317 switch (reply->stat.status) {
1319 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1320 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1321 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1325 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1326 reply->stat.autherr = fxdr_unsigned(u_int32_t, *tl);
1334 } else if (reply->stat.type != RPC_MSGACCEPTED) {
1339 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1341 reply->verf_md = md;
1342 reply->verf_dpos = dpos;
1344 reply->verf_type = fxdr_unsigned(u_int32_t, *tl++);
1345 reply->verf_size = fxdr_unsigned(u_int32_t, *tl);
1347 if (reply->verf_size != 0)
1348 rpcm_adv(rpcm_rndup(reply->verf_size));
1350 rpcm_dissect(tl, u_int32_t *, RPCX_UNSIGNED);
1351 reply->stat.status = fxdr_unsigned(u_int32_t, *tl);
1353 if (reply->stat.status == RPC_SUCCESS) {
1354 if ((uint32_t)(dpos - mtod(md, caddr_t)) >= md->m_len) {
1355 RPCDEBUG("where is the next mbuf?");
1356 RPCDEBUG("%d -> %d",
1357 (int)(dpos - mtod(md, caddr_t)), md->m_len);
1358 if (md->m_next == NULL) {
1362 reply->result_md = md->m_next;
1363 reply->result_dpos = mtod(reply->result_md,
1367 reply->result_md = md;
1368 reply->result_dpos = dpos;
1370 } else if (reply->stat.status == RPC_PROGMISMATCH) {
1371 rpcm_dissect(tl, u_int32_t *, 2 * RPCX_UNSIGNED);
1372 reply->stat.mismatch_info.low = fxdr_unsigned(u_int32_t, *tl++);
1373 reply->stat.mismatch_info.high = fxdr_unsigned(u_int32_t, *tl);
1377 error = EPROTONOSUPPORT;
1389 * Scan the rpctask list and retranmit any requests that have timed out.
1390 * To avoid retransmission attempts on STREAM sockets (in the future) make
1391 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1398 struct timeout *to = (struct timeout *) arg;
1400 struct rpctask *rep;
1403 struct rpcclnt *rpc;
1408 struct thread *td = curthread;
1416 mtx_lock(&Giant); /* rpc_msg -> tprintf */
1417 TAILQ_FOREACH(rep, &rpctask_q, r_chain) {
1418 rpc = rep->r_rpcclnt;
1419 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1421 if (rpcclnt_sigintr(rpc, rep, rep->r_td)) {
1422 rep->r_flags |= R_SOFTTERM;
1425 if (rep->r_rtt >= 0) {
1427 if (rpc->rc_flag & RPCCLNT_DUMBTIMR)
1428 timeo = rpc->rc_timeo;
1430 timeo = RPC_RTO(rpc, rpcclnt_proct(rep->r_rpcclnt,
1432 if (rpc->rc_timeouts > 0)
1433 timeo *= rpcclnt_backoff[rpc->rc_timeouts - 1];
1434 if (rep->r_rtt <= timeo)
1436 if (rpc->rc_timeouts < 8)
1440 * Check for server not responding
1442 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1443 rep->r_rexmit > rpc->rc_deadthresh) {
1444 rpcclnt_msg(rep->r_td, rpc->rc_prog->prog_name,
1446 rep->r_flags |= R_TPRINTFMSG;
1448 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1449 rpcstats.rpctimeouts++;
1450 rep->r_flags |= R_SOFTTERM;
1453 if (rpc->rc_sotype != SOCK_DGRAM) {
1454 if (++rep->r_rexmit > RPC_MAXREXMIT)
1455 rep->r_rexmit = RPC_MAXREXMIT;
1458 if ((so = rpc->rc_so) == NULL)
1462 * If there is enough space and the window allows.. Resend it
1463 * Set r_rtt to -1 in case we fail to send it now.
1466 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1467 ((rpc->rc_flag & RPCCLNT_DUMBTIMR) ||
1468 (rep->r_flags & R_SENT) ||
1469 rpc->rc_sent < rpc->rc_cwnd) &&
1470 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))) {
1471 if ((rpc->rc_flag & RPCCLNT_NOCONN) == 0)
1472 error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m,
1475 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, rpc->rc_name, NULL, td);
1477 if (RPCIGNORE_SOERROR(rpc->rc_soflags, error))
1481 * Iff first send, start timing else turn
1482 * timing off, backoff timer and divide
1483 * congestion window by 2.
1485 if (rep->r_flags & R_SENT) {
1486 rep->r_flags &= ~R_TIMING;
1487 if (++rep->r_rexmit > RPC_MAXREXMIT)
1488 rep->r_rexmit = RPC_MAXREXMIT;
1490 if (rpc->rc_cwnd < RPC_CWNDSCALE)
1491 rpc->rc_cwnd = RPC_CWNDSCALE;
1492 rpcstats.rpcretries++;
1494 rep->r_flags |= R_SENT;
1495 rpc->rc_sent += RPC_CWNDSCALE;
1501 mtx_unlock(&Giant); /* rpc_msg -> tprintf */
1505 timeout_add(rpcclnt_timer, to, rpcclnt_ticks);
1507 callout_reset(&rpcclnt_callout, rpcclnt_ticks, rpcclnt_timer, NULL);
1512 * Test for a termination condition pending on the process. This is used for
1513 * RPCCLNT_INT mounts.
1516 rpcclnt_sigintr(rpc, task, pr)
1517 struct rpcclnt *rpc;
1518 struct rpctask *task;
1528 /* XXX deal with forced unmounts */
1530 if (task && (task->r_flags & R_SOFTTERM))
1533 if (!(rpc->rc_flag & RPCCLNT_INT))
1541 if (p && p->p_siglist &&
1542 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
1548 tmpset = p->p_siglist;
1549 SIGSETNAND(tmpset, pr->td_sigmask);
1550 mtx_lock(&p->p_sigacts->ps_mtx);
1551 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1552 mtx_unlock(&p->p_sigacts->ps_mtx);
1553 if (SIGNOTEMPTY(p->p_siglist) && RPCCLNTINT_SIGMASK(tmpset)) {
1563 * Lock a socket against others. Necessary for STREAM sockets to ensure you
1564 * get an entire rpc request/reply and also to avoid race conditions between
1565 * the processes with nfs requests in progress when a reconnect is necessary.
1568 rpcclnt_sndlock(flagp, task)
1570 struct rpctask *task;
1573 int slpflag = 0, slptimeo = 0;
1576 if (task->r_rpcclnt->rc_flag & RPCCLNT_INT)
1578 while (*flagp & RPCCLNT_SNDLOCK) {
1579 if (rpcclnt_sigintr(task->r_rpcclnt, task, p))
1581 *flagp |= RPCCLNT_WANTSND;
1582 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcsndlck",
1584 if (slpflag == PCATCH) {
1589 *flagp |= RPCCLNT_SNDLOCK;
1594 * Unlock the stream socket for others.
1597 rpcclnt_sndunlock(flagp)
1601 if ((*flagp & RPCCLNT_SNDLOCK) == 0)
1602 panic("rpc sndunlock");
1603 *flagp &= ~RPCCLNT_SNDLOCK;
1604 if (*flagp & RPCCLNT_WANTSND) {
1605 *flagp &= ~RPCCLNT_WANTSND;
1606 wakeup((caddr_t) flagp);
1611 rpcclnt_rcvlock(task)
1612 struct rpctask *task;
1614 int *flagp = &task->r_rpcclnt->rc_flag;
1615 int slpflag, slptimeo = 0;
1617 if (*flagp & RPCCLNT_INT)
1621 while (*flagp & RPCCLNT_RCVLOCK) {
1622 if (rpcclnt_sigintr(task->r_rpcclnt, task, task->r_td))
1624 *flagp |= RPCCLNT_WANTRCV;
1625 (void)tsleep((caddr_t) flagp, slpflag | (PZERO - 1), "rpcrcvlk",
1627 if (slpflag == PCATCH) {
1632 *flagp |= RPCCLNT_RCVLOCK;
1637 * Unlock the stream socket for others.
1640 rpcclnt_rcvunlock(flagp)
1644 if ((*flagp & RPCCLNT_RCVLOCK) == 0)
1645 panic("nfs rcvunlock");
1646 *flagp &= ~RPCCLNT_RCVLOCK;
1647 if (*flagp & RPCCLNT_WANTRCV) {
1648 *flagp &= ~RPCCLNT_WANTRCV;
1649 wakeup((caddr_t) flagp);
1655 * Check for badly aligned mbuf data areas and realign data in an mbuf list
1656 * by copying the data areas up, as required.
1659 rpcclnt_realign(m, hsiz)
1664 int siz, mlen, olen;
1670 * This never happens for UDP, rarely happens for TCP but
1671 * frequently happens for iso transport.
1673 if ((m->m_len & 0x3) || (mtod(m, long)&0x3)) {
1675 fcp = mtod(m, caddr_t);
1676 if ((long)fcp & 0x3) {
1677 if (m->m_flags & M_PKTHDR)
1678 m_tag_delete_chain(m, NULL);
1679 m->m_flags &= ~M_PKTHDR;
1680 if (m->m_flags & M_EXT)
1681 m->m_data = m->m_ext.ext_buf +
1682 ((m->m_ext.ext_size - olen) & ~0x3);
1684 m->m_data = m->m_dat;
1687 tcp = mtod(m, caddr_t);
1692 * If possible, only put the first invariant part of
1693 * the RPC header in the first mbuf.
1695 mlen = M_TRAILINGSPACE(m);
1696 if (olen <= hsiz && mlen > hsiz)
1699 /* Loop through the mbuf list consolidating data. */
1703 if (m2->m_flags & M_PKTHDR)
1704 m_tag_delete_chain(m2, NULL);
1705 m2->m_flags &= ~M_PKTHDR;
1706 if (m2->m_flags & M_EXT)
1707 m2->m_data = m2->m_ext.ext_buf;
1709 m2->m_data = m2->m_dat;
1711 mlen = M_TRAILINGSPACE(m2);
1712 tcp = mtod(m2, caddr_t);
1716 siz = min(mlen, olen);
1718 bcopy(fcp, tcp, siz);
1728 fcp = mtod(m, caddr_t);
1733 * Finally, set m_len == 0 for any trailing mbufs
1734 * that have been copied out of.
1747 rpcclnt_realign(struct mbuf **pm, int hsiz)
1750 struct mbuf *n = NULL;
1753 RPCDEBUG("in rpcclnt_realign()");
1755 while ((m = *pm) != NULL) {
1756 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1757 MGET(n, M_TRYWAIT, MT_DATA);
1758 if (m->m_len >= MINCLSIZE) {
1759 MCLGET(n, M_TRYWAIT);
1768 * If n is non-NULL, loop on m copying data, then replace the
1769 * portion of the chain that had to be realigned.
1773 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1781 RPCDEBUG("leave rpcclnt_realign()");
1786 rpcclnt_msg(p, server, msg)
1793 struct proc *pr = p;
1796 tpr = tprintf_open(p);
1799 tprintf(tpr, "rpc server %s: %s\n", server, msg);
1805 tprintf(p ? p->td_proc : NULL, LOG_INFO,
1806 "nfs server %s: %s\n", server, msg);
1812 * Build the RPC header and fill in the authorization info. The authorization
1813 * string argument is only used when the credentials come from outside of the
1814 * kernel (AUTH_KERB). (likewise, the ucred is only used when inside the
1815 * kernel) Returns the head of the mbuf list.
1817 static struct mbuf *
1818 rpcclnt_buildheader(rc, procid, mrest, mrest_len, xidp, mheadend, cred)
1822 u_int32_t mrest_len;
1824 struct mbuf **mheadend;
1825 struct ucred * cred;
1827 /* register */ struct mbuf *mb;
1828 register u_int32_t *tl;
1829 /* register */ caddr_t bpos;
1830 struct mbuf *mreq, *mb2;
1833 MGETHDR(mb, M_TRYWAIT, MT_DATA);
1834 if (6 * RPCX_UNSIGNED >= MINCLSIZE) {
1835 MCLGET(mb, M_TRYWAIT);
1836 } else if (6 * RPCX_UNSIGNED < MHLEN) {
1837 MH_ALIGN(mb, 6 * RPCX_UNSIGNED);
1839 RPCDEBUG("mbuf too small");
1840 panic("cheap bailout");
1844 bpos = mtod(mb, caddr_t);
1847 * First the RPC header.
1849 rpcm_build(tl, u_int32_t *, 6 * RPCX_UNSIGNED);
1851 /* Get a new (non-zero) xid */
1852 if ((rpcclnt_xid == 0) && (rpcclnt_xid_touched == 0)) {
1853 rpcclnt_xid = arc4random();
1854 rpcclnt_xid_touched = 1;
1856 while ((*xidp = arc4random() % 256) == 0);
1857 rpcclnt_xid += *xidp;
1861 *tl++ = *xidp = txdr_unsigned(rpcclnt_xid);
1865 *tl++ = txdr_unsigned(rc->rc_prog->prog_id);
1866 *tl++ = txdr_unsigned(rc->rc_prog->prog_version);
1867 *tl++ = txdr_unsigned(procid);
1869 if ((error = rpcauth_buildheader(rc->rc_auth, cred, &mb, &bpos))) {
1870 RPCDEBUG("rpcauth_buildheader failed %d", error);
1876 mreq->m_pkthdr.len = m_length(mreq, NULL);
1877 mreq->m_pkthdr.rcvif = NULL;
1882 * Help break down an mbuf chain by setting the first siz bytes contiguous
1883 * pointed to by returned val. This is used by the macros rpcm_dissect and
1884 * rpcm_dissecton for tough cases. (The macros use the vars. dpos and dpos2)
1887 rpcm_disct(mdp, dposp, siz, left, cp2)
1894 struct mbuf *mp, *mp2;
1900 *mdp = mp = mp->m_next;
1902 RPC_RETURN(EBADRPC);
1904 *dposp = mtod(mp, caddr_t);
1909 } else if (mp->m_next == NULL) {
1910 RPC_RETURN(EBADRPC);
1911 } else if (siz > MHLEN) {
1912 panic("rpc S too big");
1914 MGET(mp2, M_TRYWAIT, MT_DATA);
1915 mp2->m_next = mp->m_next;
1919 *cp2 = p = mtod(mp, caddr_t);
1920 bcopy(*dposp, p, left); /* Copy what was left */
1924 /* Loop around copying up the siz2 bytes */
1927 RPC_RETURN(EBADRPC);
1928 xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
1930 bcopy(mtod(mp2, caddr_t), p, xfer);
1941 *dposp = mtod(mp2, caddr_t);
1949 rpcclnt_proct(rpc, procid)
1950 struct rpcclnt *rpc;
1953 if (rpc->rc_proctlen != 0 && rpc->rc_proct != NULL &&
1954 procid < rpc->rc_proctlen) {
1955 return rpc->rc_proct[procid];
1961 rpc_adv(mdp, dposp, offs, left)
1976 RPC_RETURN(EBADRPC);
1980 *dposp = mtod(m, caddr_t) + offs;
1985 rpcclnt_cancelreqs(rpc)
1986 struct rpcclnt *rpc;
1988 struct rpctask *task;
1992 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
1993 if (rpc != task->r_rpcclnt || task->r_mrep != NULL ||
1994 (task->r_flags & R_SOFTTERM))
1996 rpcclnt_softterm(task);
2000 for (i = 0; i < 30; i++) {
2002 TAILQ_FOREACH(task, &rpctask_q, r_chain) {
2003 if (rpc == task->r_rpcclnt)
2009 tsleep(&lbolt, PSOCK, "nfscancel", 0);
2015 rpcclnt_softterm(struct rpctask * task)
2017 task->r_flags |= R_SOFTTERM;
2018 if (task->r_flags & R_SENT) {
2019 task->r_rpcclnt->rc_sent -= RPC_CWNDSCALE;
2020 task->r_flags &= ~R_SENT;
2026 /* called by rpcclnt_get() */
2028 rpcclnt_create(struct rpcclnt ** rpc)
2030 MALLOC(*rpc, struct rpcclnt *, sizeof(struct rpcclnt), M_RPC, M_WAITOK | M_ZERO);
2033 /* called by rpcclnt_put() */
2035 rpcclnt_destroy(struct rpcclnt * rpc)
2040 RPCDEBUG("attempting to free a NULL rpcclnt (not dereferenced)");
2043 #endif /* !__OpenBSD__ */
2046 /* XXX: add a lock around the auth structure in struct rpcclnt and make this
2047 * call safe for calling durring a connection */
2049 rpcauth_buildheader(struct rpc_auth * auth, struct ucred * cred, struct mbuf ** mhdr, caddr_t * bp)
2051 size_t authsiz, verfsiz;
2052 uint32_t mlen, grpsiz;
2053 register struct mbuf *mb, *mb2;
2055 register u_int32_t *tl;
2058 if (auth == NULL || mhdr == NULL)
2061 switch (auth->auth_type) {
2067 authsiz = (5 + cred->cr_ngroups) * RPCX_UNSIGNED;
2071 return EPROTONOSUPPORT;
2075 mlen = rpcm_rndup(authsiz) + rpcm_rndup(verfsiz) + 4 * RPCX_UNSIGNED;
2080 rpcm_build(tl, u_int32_t *, mlen);
2085 *tl++ = txdr_unsigned(auth->auth_type);
2086 *tl++ = txdr_unsigned(authsiz);
2087 switch (auth->auth_type) {
2092 *tl++ = txdr_unsigned(cred->cr_uid);
2093 *tl++ = txdr_unsigned(cred->cr_groups[0]);
2094 grpsiz = cred->cr_ngroups;
2095 *tl++ = txdr_unsigned(grpsiz);
2096 /* XXX: groups[0] is already sent... */
2097 for (i = 0 ; i < grpsiz ; i++) {
2098 *tl++ = txdr_unsigned(cred->cr_groups[i]);
2101 /* null verification header */
2102 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2106 /* just a null verf header */
2107 *tl++ = txdr_unsigned(RPCAUTH_NULL);
2111 panic("inconsistent rpc auth type");