1 /* $NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $ */
4 * Copyright (c) 2009, Sun Microsystems, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * - Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * - Neither the name of Sun Microsystems, Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived
16 * from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
31 #if defined(LIBC_SCCS) && !defined(lint)
32 static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
33 static char *sccsid = "@(#)svc_tcp.c 2.2 88/08/01 4.0 RPCSRC";
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
39 * svc_vc.c, Server side for Connection Oriented based RPC.
41 * Actually implements two flavors of transporter -
42 * a tcp rendezvouser (a listner and connection establisher)
43 * and a record/tcp stream.
46 #include <sys/param.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
51 #include <sys/mutex.h>
53 #include <sys/protosw.h>
54 #include <sys/queue.h>
55 #include <sys/socket.h>
56 #include <sys/socketvar.h>
58 #include <sys/systm.h>
63 #include <netinet/tcp.h>
68 #include <rpc/rpc_com.h>
70 #include <security/mac/mac_framework.h>
72 static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
73 struct sockaddr **, struct mbuf **);
74 static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
75 static void svc_vc_rendezvous_destroy(SVCXPRT *);
76 static bool_t svc_vc_null(void);
77 static void svc_vc_destroy(SVCXPRT *);
78 static enum xprt_stat svc_vc_stat(SVCXPRT *);
79 static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
80 struct sockaddr **, struct mbuf **);
81 static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
82 struct sockaddr *, struct mbuf *);
83 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
84 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
86 static void svc_vc_backchannel_destroy(SVCXPRT *);
87 static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
88 static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
89 struct sockaddr **, struct mbuf **);
90 static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
91 struct sockaddr *, struct mbuf *);
92 static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
94 static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
95 struct sockaddr *raddr);
96 static int svc_vc_accept(struct socket *head, struct socket **sop);
97 static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
99 static struct xp_ops svc_vc_rendezvous_ops = {
100 .xp_recv = svc_vc_rendezvous_recv,
101 .xp_stat = svc_vc_rendezvous_stat,
102 .xp_reply = (bool_t (*)(SVCXPRT *, struct rpc_msg *,
103 struct sockaddr *, struct mbuf *))svc_vc_null,
104 .xp_destroy = svc_vc_rendezvous_destroy,
105 .xp_control = svc_vc_rendezvous_control
108 static struct xp_ops svc_vc_ops = {
109 .xp_recv = svc_vc_recv,
110 .xp_stat = svc_vc_stat,
111 .xp_reply = svc_vc_reply,
112 .xp_destroy = svc_vc_destroy,
113 .xp_control = svc_vc_control
116 static struct xp_ops svc_vc_backchannel_ops = {
117 .xp_recv = svc_vc_backchannel_recv,
118 .xp_stat = svc_vc_backchannel_stat,
119 .xp_reply = svc_vc_backchannel_reply,
120 .xp_destroy = svc_vc_backchannel_destroy,
121 .xp_control = svc_vc_backchannel_control
126 * xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
128 * Creates, registers, and returns a (rpc) tcp based transporter.
129 * Once *xprt is initialized, it is registered as a transporter
130 * see (svc.h, xprt_register). This routine returns
131 * a NULL if a problem occurred.
133 * The filedescriptor passed in is expected to refer to a bound, but
134 * not yet connected socket.
136 * Since streams do buffered io similar to stdio, the caller can specify
137 * how big the send and receive buffers are via the second and third parms;
138 * 0 => use the system default.
141 svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
149 if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
151 error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
154 xprt = svc_vc_create_conn(pool, so, sa);
160 xprt = svc_xprt_alloc();
161 sx_init(&xprt->xp_lock, "xprt->xp_lock");
162 xprt->xp_pool = pool;
163 xprt->xp_socket = so;
166 xprt->xp_ops = &svc_vc_rendezvous_ops;
168 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
170 goto cleanup_svc_vc_create;
173 memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
178 solisten(so, SOMAXCONN, curthread);
180 SOCKBUF_LOCK(&so->so_rcv);
181 xprt->xp_upcallset = 1;
182 soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
183 SOCKBUF_UNLOCK(&so->so_rcv);
186 cleanup_svc_vc_create:
193 * Create a new transport for a socket optained via soaccept().
196 svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
198 SVCXPRT *xprt = NULL;
199 struct cf_conn *cd = NULL;
200 struct sockaddr* sa = NULL;
205 bzero(&opt, sizeof(struct sockopt));
206 opt.sopt_dir = SOPT_SET;
207 opt.sopt_level = SOL_SOCKET;
208 opt.sopt_name = SO_KEEPALIVE;
210 opt.sopt_valsize = sizeof(one);
211 error = sosetopt(so, &opt);
216 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
217 bzero(&opt, sizeof(struct sockopt));
218 opt.sopt_dir = SOPT_SET;
219 opt.sopt_level = IPPROTO_TCP;
220 opt.sopt_name = TCP_NODELAY;
222 opt.sopt_valsize = sizeof(one);
223 error = sosetopt(so, &opt);
229 cd = mem_alloc(sizeof(*cd));
230 cd->strm_stat = XPRT_IDLE;
232 xprt = svc_xprt_alloc();
233 sx_init(&xprt->xp_lock, "xprt->xp_lock");
234 xprt->xp_pool = pool;
235 xprt->xp_socket = so;
238 xprt->xp_ops = &svc_vc_ops;
241 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
242 * has a 5 minute timer, server has a 6 minute timer.
244 xprt->xp_idletimeout = 6 * 60;
246 memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
248 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
250 goto cleanup_svc_vc_create;
252 memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
257 SOCKBUF_LOCK(&so->so_rcv);
258 xprt->xp_upcallset = 1;
259 soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
260 SOCKBUF_UNLOCK(&so->so_rcv);
263 * Throw the transport into the active list in case it already
264 * has some data buffered.
266 sx_xlock(&xprt->xp_lock);
268 sx_xunlock(&xprt->xp_lock);
271 cleanup_svc_vc_create:
273 mem_free(xprt, sizeof(*xprt));
276 mem_free(cd, sizeof(*cd));
281 * Create a new transport for a backchannel on a clnt_vc socket.
284 svc_vc_create_backchannel(SVCPOOL *pool)
286 SVCXPRT *xprt = NULL;
287 struct cf_conn *cd = NULL;
289 cd = mem_alloc(sizeof(*cd));
290 cd->strm_stat = XPRT_IDLE;
292 xprt = svc_xprt_alloc();
293 sx_init(&xprt->xp_lock, "xprt->xp_lock");
294 xprt->xp_pool = pool;
295 xprt->xp_socket = NULL;
298 xprt->xp_ops = &svc_vc_backchannel_ops;
303 * This does all of the accept except the final call to soaccept. The
304 * caller will call soaccept after dropping its locks (soaccept may
308 svc_vc_accept(struct socket *head, struct socket **sop)
313 if ((head->so_options & SO_ACCEPTCONN) == 0) {
318 error = mac_socket_check_accept(curthread->td_ucred, head);
323 if (TAILQ_EMPTY(&head->so_comp)) {
328 so = TAILQ_FIRST(&head->so_comp);
329 KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
330 KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
333 * Before changing the flags on the socket, we have to bump the
334 * reference count. Otherwise, if the protocol calls sofree(),
335 * the socket will be released due to a zero refcount.
336 * XXX might not need soref() since this is simpler than kern_accept.
338 SOCK_LOCK(so); /* soref() and so_state update */
339 soref(so); /* file descriptor reference */
341 TAILQ_REMOVE(&head->so_comp, so, so_list);
343 so->so_state |= (head->so_state & SS_NBIO);
344 so->so_qstate &= ~SQ_COMP;
352 /* connection has been removed from the listen queue */
353 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
360 svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
361 struct sockaddr **addrp, struct mbuf **mp)
363 struct socket *so = NULL;
364 struct sockaddr *sa = NULL;
369 * The socket upcall calls xprt_active() which will eventually
370 * cause the server to call us here. We attempt to accept a
371 * connection from the socket and turn it into a new
372 * transport. If the accept fails, we have drained all pending
373 * connections so we call xprt_inactive().
375 sx_xlock(&xprt->xp_lock);
377 error = svc_vc_accept(xprt->xp_socket, &so);
379 if (error == EWOULDBLOCK) {
381 * We must re-test for new connections after taking
382 * the lock to protect us in the case where a new
383 * connection arrives after our call to accept fails
384 * with EWOULDBLOCK. The pool lock protects us from
385 * racing the upcall after our TAILQ_EMPTY() call
389 mtx_lock(&xprt->xp_pool->sp_lock);
390 if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
391 xprt_inactive_locked(xprt);
392 mtx_unlock(&xprt->xp_pool->sp_lock);
394 sx_xunlock(&xprt->xp_lock);
399 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
400 if (xprt->xp_upcallset) {
401 xprt->xp_upcallset = 0;
402 soupcall_clear(xprt->xp_socket, SO_RCV);
404 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
406 sx_xunlock(&xprt->xp_lock);
410 sx_xunlock(&xprt->xp_lock);
413 error = soaccept(so, &sa);
417 * XXX not sure if I need to call sofree or soclose here.
425 * svc_vc_create_conn will call xprt_register - we don't need
426 * to do anything with the new connection except derefence it.
428 new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
432 SVC_RELEASE(new_xprt);
437 return (FALSE); /* there is never an rpc msg to be processed */
441 static enum xprt_stat
442 svc_vc_rendezvous_stat(SVCXPRT *xprt)
449 svc_vc_destroy_common(SVCXPRT *xprt)
451 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
452 if (xprt->xp_upcallset) {
453 xprt->xp_upcallset = 0;
454 soupcall_clear(xprt->xp_socket, SO_RCV);
456 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
458 sx_destroy(&xprt->xp_lock);
460 (void)soclose(xprt->xp_socket);
463 (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
468 svc_vc_rendezvous_destroy(SVCXPRT *xprt)
471 svc_vc_destroy_common(xprt);
475 svc_vc_destroy(SVCXPRT *xprt)
477 struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
479 svc_vc_destroy_common(xprt);
484 m_freem(cd->mpending);
485 mem_free(cd, sizeof(*cd));
489 svc_vc_backchannel_destroy(SVCXPRT *xprt)
491 struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
501 mem_free(cd, sizeof(*cd));
506 svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
512 svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
519 svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
525 static enum xprt_stat
526 svc_vc_stat(SVCXPRT *xprt)
532 cd = (struct cf_conn *)(xprt->xp_p1);
534 if (cd->strm_stat == XPRT_DIED)
538 * Return XPRT_MOREREQS if we have buffered data and we are
539 * mid-record or if we have enough data for a record
540 * marker. Since this is only a hint, we read mpending and
541 * resid outside the lock. We do need to take the lock if we
542 * have to traverse the mbuf chain.
546 return (XPRT_MOREREQS);
548 sx_xlock(&xprt->xp_lock);
550 while (m && n < sizeof(uint32_t)) {
554 sx_xunlock(&xprt->xp_lock);
555 if (n >= sizeof(uint32_t))
556 return (XPRT_MOREREQS);
559 if (soreadable(xprt->xp_socket))
560 return (XPRT_MOREREQS);
565 static enum xprt_stat
566 svc_vc_backchannel_stat(SVCXPRT *xprt)
570 cd = (struct cf_conn *)(xprt->xp_p1);
572 if (cd->mreq != NULL)
573 return (XPRT_MOREREQS);
579 svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
580 struct sockaddr **addrp, struct mbuf **mp)
582 struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
589 * Serialise access to the socket and our own record parsing
592 sx_xlock(&xprt->xp_lock);
596 * If we have an mbuf chain in cd->mpending, try to parse a
597 * record from it, leaving the result in cd->mreq. If we don't
598 * have a complete record, leave the partial result in
599 * cd->mreq and try to read more from the socket.
603 * If cd->resid is non-zero, we have part of the
604 * record already, otherwise we are expecting a record
609 * See if there is enough data buffered to
610 * make up a record marker. Make sure we can
611 * handle the case where the record marker is
612 * split across more than one mbuf.
618 while (n < sizeof(uint32_t) && m) {
622 if (n < sizeof(uint32_t))
624 m_copydata(cd->mpending, 0, sizeof(header),
626 header = ntohl(header);
627 cd->eor = (header & 0x80000000) != 0;
628 cd->resid = header & 0x7fffffff;
629 m_adj(cd->mpending, sizeof(uint32_t));
633 * Start pulling off mbufs from cd->mpending
634 * until we either have a complete record or
635 * we run out of data. We use m_split to pull
636 * data - it will pull as much as possible and
637 * split the last mbuf if necessary.
639 while (cd->mpending && cd->resid) {
641 if (cd->mpending->m_next
642 || cd->mpending->m_len > cd->resid)
643 cd->mpending = m_split(cd->mpending,
644 cd->resid, M_WAITOK);
648 m_last(cd->mreq)->m_next = m;
652 cd->resid -= m->m_len;
658 * If cd->resid is zero now, we have managed to
659 * receive a record fragment from the stream. Check
660 * for the end-of-record mark to see if we need more.
662 if (cd->resid == 0) {
667 * Success - we have a complete record in
670 xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
672 sx_xunlock(&xprt->xp_lock);
674 if (! xdr_callmsg(&xdrs, msg)) {
680 *mp = xdrmbuf_getall(&xdrs);
689 * The socket upcall calls xprt_active() which will eventually
690 * cause the server to call us here. We attempt to
691 * read as much as possible from the socket and put
692 * the result in cd->mpending. If the read fails,
693 * we have drained both cd->mpending and the socket so
694 * we can call xprt_inactive().
696 uio.uio_resid = 1000000000;
697 uio.uio_td = curthread;
699 rcvflag = MSG_DONTWAIT;
700 error = soreceive(xprt->xp_socket, NULL, &uio, &m, NULL,
703 if (error == EWOULDBLOCK) {
705 * We must re-test for readability after
706 * taking the lock to protect us in the case
707 * where a new packet arrives on the socket
708 * after our call to soreceive fails with
709 * EWOULDBLOCK. The pool lock protects us from
710 * racing the upcall after our soreadable()
711 * call returns false.
713 mtx_lock(&xprt->xp_pool->sp_lock);
714 if (!soreadable(xprt->xp_socket))
715 xprt_inactive_locked(xprt);
716 mtx_unlock(&xprt->xp_pool->sp_lock);
717 sx_xunlock(&xprt->xp_lock);
722 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
723 if (xprt->xp_upcallset) {
724 xprt->xp_upcallset = 0;
725 soupcall_clear(xprt->xp_socket, SO_RCV);
727 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
729 cd->strm_stat = XPRT_DIED;
730 sx_xunlock(&xprt->xp_lock);
736 * EOF - the other end has closed the socket.
739 cd->strm_stat = XPRT_DIED;
740 sx_xunlock(&xprt->xp_lock);
745 m_last(cd->mpending)->m_next = m;
752 svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
753 struct sockaddr **addrp, struct mbuf **mp)
755 struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
760 sx_xlock(&xprt->xp_lock);
761 ct = (struct ct_data *)xprt->xp_p2;
763 sx_xunlock(&xprt->xp_lock);
766 mtx_lock(&ct->ct_lock);
770 mtx_unlock(&ct->ct_lock);
771 sx_xunlock(&xprt->xp_lock);
774 cd->mreq = m->m_nextpkt;
775 mtx_unlock(&ct->ct_lock);
776 sx_xunlock(&xprt->xp_lock);
778 xdrmbuf_create(&xdrs, m, XDR_DECODE);
779 if (! xdr_callmsg(&xdrs, msg)) {
784 *mp = xdrmbuf_getall(&xdrs);
790 svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
791 struct sockaddr *addr, struct mbuf *m)
799 * Leave space for record mark.
801 mrep = m_gethdr(M_WAITOK, MT_DATA);
802 mrep->m_data += sizeof(uint32_t);
804 xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
806 if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
807 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
808 if (!xdr_replymsg(&xdrs, msg))
811 xdrmbuf_append(&xdrs, m);
813 stat = xdr_replymsg(&xdrs, msg);
820 * Prepend a record marker containing the reply length.
822 M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
823 *mtod(mrep, uint32_t *) =
824 htonl(0x80000000 | (mrep->m_pkthdr.len
825 - sizeof(uint32_t)));
826 error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
842 svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
843 struct sockaddr *addr, struct mbuf *m)
852 * Leave space for record mark.
854 mrep = m_gethdr(M_WAITOK, MT_DATA);
855 mrep->m_data += sizeof(uint32_t);
857 xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
859 if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
860 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
861 if (!xdr_replymsg(&xdrs, msg))
864 xdrmbuf_append(&xdrs, m);
866 stat = xdr_replymsg(&xdrs, msg);
873 * Prepend a record marker containing the reply length.
875 M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
876 *mtod(mrep, uint32_t *) =
877 htonl(0x80000000 | (mrep->m_pkthdr.len
878 - sizeof(uint32_t)));
879 sx_xlock(&xprt->xp_lock);
880 ct = (struct ct_data *)xprt->xp_p2;
882 error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
886 sx_xunlock(&xprt->xp_lock);
907 svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
909 SVCXPRT *xprt = (SVCXPRT *) arg;
917 * Get the effective UID of the sending process. Used by rpcbind, keyserv
918 * and rpc.yppasswdd on AF_LOCAL.
921 __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
927 sock = transp->xp_fd;
928 sa = (struct sockaddr *)transp->xp_rtaddr;
929 if (sa->sa_family == AF_LOCAL) {
930 ret = getpeereid(sock, &euid, &egid);