1 /* $NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $ */
4 * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
5 * unrestricted use provided that this legend is included on all tape
6 * media and as a part of the software program in whole or part. Users
7 * may copy or modify Sun RPC without charge, but are not authorized
8 * to license or distribute it to anyone else except as part of a product or
9 * program developed by the user.
11 * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
12 * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
13 * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
15 * Sun RPC is provided with no support and without any obligation on the
16 * part of Sun Microsystems, Inc. to assist in its use, correction,
17 * modification or enhancement.
19 * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
20 * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
21 * OR ANY PART THEREOF.
23 * In no event will Sun Microsystems, Inc. be liable for any lost revenue
24 * or profits or other special, indirect and consequential damages, even if
25 * Sun has been advised of the possibility of such damages.
27 * Sun Microsystems, Inc.
29 * Mountain View, California 94043
32 #if defined(LIBC_SCCS) && !defined(lint)
33 static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
34 static char *sccsid = "@(#)svc_tcp.c 2.2 88/08/01 4.0 RPCSRC";
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
40 * svc_vc.c, Server side for Connection Oriented based RPC.
42 * Actually implements two flavors of transporter -
43 * a tcp rendezvouser (a listner and connection establisher)
44 * and a record/tcp stream.
47 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/malloc.h>
52 #include <sys/mutex.h>
54 #include <sys/protosw.h>
55 #include <sys/queue.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
59 #include <sys/systm.h>
64 #include <netinet/tcp.h>
69 #include <rpc/rpc_com.h>
71 #include <security/mac/mac_framework.h>
73 static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
74 struct sockaddr **, struct mbuf **);
75 static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
76 static void svc_vc_rendezvous_destroy(SVCXPRT *);
77 static bool_t svc_vc_null(void);
78 static void svc_vc_destroy(SVCXPRT *);
79 static enum xprt_stat svc_vc_stat(SVCXPRT *);
80 static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
81 struct sockaddr **, struct mbuf **);
82 static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
83 struct sockaddr *, struct mbuf *);
84 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
85 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
87 static void svc_vc_backchannel_destroy(SVCXPRT *);
88 static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
89 static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
90 struct sockaddr **, struct mbuf **);
91 static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
92 struct sockaddr *, struct mbuf *);
93 static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
95 static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
96 struct sockaddr *raddr);
97 static int svc_vc_accept(struct socket *head, struct socket **sop);
98 static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
100 static struct xp_ops svc_vc_rendezvous_ops = {
101 .xp_recv = svc_vc_rendezvous_recv,
102 .xp_stat = svc_vc_rendezvous_stat,
103 .xp_reply = (bool_t (*)(SVCXPRT *, struct rpc_msg *,
104 struct sockaddr *, struct mbuf *))svc_vc_null,
105 .xp_destroy = svc_vc_rendezvous_destroy,
106 .xp_control = svc_vc_rendezvous_control
109 static struct xp_ops svc_vc_ops = {
110 .xp_recv = svc_vc_recv,
111 .xp_stat = svc_vc_stat,
112 .xp_reply = svc_vc_reply,
113 .xp_destroy = svc_vc_destroy,
114 .xp_control = svc_vc_control
117 static struct xp_ops svc_vc_backchannel_ops = {
118 .xp_recv = svc_vc_backchannel_recv,
119 .xp_stat = svc_vc_backchannel_stat,
120 .xp_reply = svc_vc_backchannel_reply,
121 .xp_destroy = svc_vc_backchannel_destroy,
122 .xp_control = svc_vc_backchannel_control
127 * xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
129 * Creates, registers, and returns a (rpc) tcp based transporter.
130 * Once *xprt is initialized, it is registered as a transporter
131 * see (svc.h, xprt_register). This routine returns
132 * a NULL if a problem occurred.
134 * The filedescriptor passed in is expected to refer to a bound, but
135 * not yet connected socket.
137 * Since streams do buffered io similar to stdio, the caller can specify
138 * how big the send and receive buffers are via the second and third parms;
139 * 0 => use the system default.
142 svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
150 if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
152 error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
155 xprt = svc_vc_create_conn(pool, so, sa);
161 xprt = svc_xprt_alloc();
162 sx_init(&xprt->xp_lock, "xprt->xp_lock");
163 xprt->xp_pool = pool;
164 xprt->xp_socket = so;
167 xprt->xp_ops = &svc_vc_rendezvous_ops;
169 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
171 goto cleanup_svc_vc_create;
174 memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
179 solisten(so, SOMAXCONN, curthread);
181 SOCKBUF_LOCK(&so->so_rcv);
182 xprt->xp_upcallset = 1;
183 soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
184 SOCKBUF_UNLOCK(&so->so_rcv);
187 cleanup_svc_vc_create:
194 * Create a new transport for a socket optained via soaccept().
197 svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
199 SVCXPRT *xprt = NULL;
200 struct cf_conn *cd = NULL;
201 struct sockaddr* sa = NULL;
206 bzero(&opt, sizeof(struct sockopt));
207 opt.sopt_dir = SOPT_SET;
208 opt.sopt_level = SOL_SOCKET;
209 opt.sopt_name = SO_KEEPALIVE;
211 opt.sopt_valsize = sizeof(one);
212 error = sosetopt(so, &opt);
217 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
218 bzero(&opt, sizeof(struct sockopt));
219 opt.sopt_dir = SOPT_SET;
220 opt.sopt_level = IPPROTO_TCP;
221 opt.sopt_name = TCP_NODELAY;
223 opt.sopt_valsize = sizeof(one);
224 error = sosetopt(so, &opt);
230 cd = mem_alloc(sizeof(*cd));
231 cd->strm_stat = XPRT_IDLE;
233 xprt = svc_xprt_alloc();
234 sx_init(&xprt->xp_lock, "xprt->xp_lock");
235 xprt->xp_pool = pool;
236 xprt->xp_socket = so;
239 xprt->xp_ops = &svc_vc_ops;
242 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
243 * has a 5 minute timer, server has a 6 minute timer.
245 xprt->xp_idletimeout = 6 * 60;
247 memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
249 error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
251 goto cleanup_svc_vc_create;
253 memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
258 SOCKBUF_LOCK(&so->so_rcv);
259 xprt->xp_upcallset = 1;
260 soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
261 SOCKBUF_UNLOCK(&so->so_rcv);
264 * Throw the transport into the active list in case it already
265 * has some data buffered.
267 sx_xlock(&xprt->xp_lock);
269 sx_xunlock(&xprt->xp_lock);
272 cleanup_svc_vc_create:
274 mem_free(xprt, sizeof(*xprt));
277 mem_free(cd, sizeof(*cd));
282 * Create a new transport for a backchannel on a clnt_vc socket.
285 svc_vc_create_backchannel(SVCPOOL *pool)
287 SVCXPRT *xprt = NULL;
288 struct cf_conn *cd = NULL;
290 cd = mem_alloc(sizeof(*cd));
291 cd->strm_stat = XPRT_IDLE;
293 xprt = svc_xprt_alloc();
294 sx_init(&xprt->xp_lock, "xprt->xp_lock");
295 xprt->xp_pool = pool;
296 xprt->xp_socket = NULL;
299 xprt->xp_ops = &svc_vc_backchannel_ops;
304 * This does all of the accept except the final call to soaccept. The
305 * caller will call soaccept after dropping its locks (soaccept may
309 svc_vc_accept(struct socket *head, struct socket **sop)
314 if ((head->so_options & SO_ACCEPTCONN) == 0) {
319 error = mac_socket_check_accept(curthread->td_ucred, head);
324 if (TAILQ_EMPTY(&head->so_comp)) {
329 so = TAILQ_FIRST(&head->so_comp);
330 KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
331 KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
334 * Before changing the flags on the socket, we have to bump the
335 * reference count. Otherwise, if the protocol calls sofree(),
336 * the socket will be released due to a zero refcount.
337 * XXX might not need soref() since this is simpler than kern_accept.
339 SOCK_LOCK(so); /* soref() and so_state update */
340 soref(so); /* file descriptor reference */
342 TAILQ_REMOVE(&head->so_comp, so, so_list);
344 so->so_state |= (head->so_state & SS_NBIO);
345 so->so_qstate &= ~SQ_COMP;
353 /* connection has been removed from the listen queue */
354 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
361 svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
362 struct sockaddr **addrp, struct mbuf **mp)
364 struct socket *so = NULL;
365 struct sockaddr *sa = NULL;
370 * The socket upcall calls xprt_active() which will eventually
371 * cause the server to call us here. We attempt to accept a
372 * connection from the socket and turn it into a new
373 * transport. If the accept fails, we have drained all pending
374 * connections so we call xprt_inactive().
376 sx_xlock(&xprt->xp_lock);
378 error = svc_vc_accept(xprt->xp_socket, &so);
380 if (error == EWOULDBLOCK) {
382 * We must re-test for new connections after taking
383 * the lock to protect us in the case where a new
384 * connection arrives after our call to accept fails
385 * with EWOULDBLOCK. The pool lock protects us from
386 * racing the upcall after our TAILQ_EMPTY() call
390 mtx_lock(&xprt->xp_pool->sp_lock);
391 if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
392 xprt_inactive_locked(xprt);
393 mtx_unlock(&xprt->xp_pool->sp_lock);
395 sx_xunlock(&xprt->xp_lock);
400 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
401 if (xprt->xp_upcallset) {
402 xprt->xp_upcallset = 0;
403 soupcall_clear(xprt->xp_socket, SO_RCV);
405 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
407 sx_xunlock(&xprt->xp_lock);
411 sx_xunlock(&xprt->xp_lock);
414 error = soaccept(so, &sa);
418 * XXX not sure if I need to call sofree or soclose here.
426 * svc_vc_create_conn will call xprt_register - we don't need
427 * to do anything with the new connection except derefence it.
429 new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
433 SVC_RELEASE(new_xprt);
438 return (FALSE); /* there is never an rpc msg to be processed */
442 static enum xprt_stat
443 svc_vc_rendezvous_stat(SVCXPRT *xprt)
450 svc_vc_destroy_common(SVCXPRT *xprt)
452 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
453 if (xprt->xp_upcallset) {
454 xprt->xp_upcallset = 0;
455 soupcall_clear(xprt->xp_socket, SO_RCV);
457 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
459 sx_destroy(&xprt->xp_lock);
461 (void)soclose(xprt->xp_socket);
464 (void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
469 svc_vc_rendezvous_destroy(SVCXPRT *xprt)
472 svc_vc_destroy_common(xprt);
476 svc_vc_destroy(SVCXPRT *xprt)
478 struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
480 svc_vc_destroy_common(xprt);
485 m_freem(cd->mpending);
486 mem_free(cd, sizeof(*cd));
490 svc_vc_backchannel_destroy(SVCXPRT *xprt)
492 struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
502 mem_free(cd, sizeof(*cd));
507 svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
513 svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
520 svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
526 static enum xprt_stat
527 svc_vc_stat(SVCXPRT *xprt)
533 cd = (struct cf_conn *)(xprt->xp_p1);
535 if (cd->strm_stat == XPRT_DIED)
539 * Return XPRT_MOREREQS if we have buffered data and we are
540 * mid-record or if we have enough data for a record
541 * marker. Since this is only a hint, we read mpending and
542 * resid outside the lock. We do need to take the lock if we
543 * have to traverse the mbuf chain.
547 return (XPRT_MOREREQS);
549 sx_xlock(&xprt->xp_lock);
551 while (m && n < sizeof(uint32_t)) {
555 sx_xunlock(&xprt->xp_lock);
556 if (n >= sizeof(uint32_t))
557 return (XPRT_MOREREQS);
560 if (soreadable(xprt->xp_socket))
561 return (XPRT_MOREREQS);
566 static enum xprt_stat
567 svc_vc_backchannel_stat(SVCXPRT *xprt)
571 cd = (struct cf_conn *)(xprt->xp_p1);
573 if (cd->mreq != NULL)
574 return (XPRT_MOREREQS);
580 svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
581 struct sockaddr **addrp, struct mbuf **mp)
583 struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
590 * Serialise access to the socket and our own record parsing
593 sx_xlock(&xprt->xp_lock);
597 * If we have an mbuf chain in cd->mpending, try to parse a
598 * record from it, leaving the result in cd->mreq. If we don't
599 * have a complete record, leave the partial result in
600 * cd->mreq and try to read more from the socket.
604 * If cd->resid is non-zero, we have part of the
605 * record already, otherwise we are expecting a record
610 * See if there is enough data buffered to
611 * make up a record marker. Make sure we can
612 * handle the case where the record marker is
613 * split across more than one mbuf.
619 while (n < sizeof(uint32_t) && m) {
623 if (n < sizeof(uint32_t))
625 m_copydata(cd->mpending, 0, sizeof(header),
627 header = ntohl(header);
628 cd->eor = (header & 0x80000000) != 0;
629 cd->resid = header & 0x7fffffff;
630 m_adj(cd->mpending, sizeof(uint32_t));
634 * Start pulling off mbufs from cd->mpending
635 * until we either have a complete record or
636 * we run out of data. We use m_split to pull
637 * data - it will pull as much as possible and
638 * split the last mbuf if necessary.
640 while (cd->mpending && cd->resid) {
642 if (cd->mpending->m_next
643 || cd->mpending->m_len > cd->resid)
644 cd->mpending = m_split(cd->mpending,
645 cd->resid, M_WAITOK);
649 m_last(cd->mreq)->m_next = m;
653 cd->resid -= m->m_len;
659 * If cd->resid is zero now, we have managed to
660 * receive a record fragment from the stream. Check
661 * for the end-of-record mark to see if we need more.
663 if (cd->resid == 0) {
668 * Success - we have a complete record in
671 xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
673 sx_xunlock(&xprt->xp_lock);
675 if (! xdr_callmsg(&xdrs, msg)) {
681 *mp = xdrmbuf_getall(&xdrs);
690 * The socket upcall calls xprt_active() which will eventually
691 * cause the server to call us here. We attempt to
692 * read as much as possible from the socket and put
693 * the result in cd->mpending. If the read fails,
694 * we have drained both cd->mpending and the socket so
695 * we can call xprt_inactive().
697 uio.uio_resid = 1000000000;
698 uio.uio_td = curthread;
700 rcvflag = MSG_DONTWAIT;
701 error = soreceive(xprt->xp_socket, NULL, &uio, &m, NULL,
704 if (error == EWOULDBLOCK) {
706 * We must re-test for readability after
707 * taking the lock to protect us in the case
708 * where a new packet arrives on the socket
709 * after our call to soreceive fails with
710 * EWOULDBLOCK. The pool lock protects us from
711 * racing the upcall after our soreadable()
712 * call returns false.
714 mtx_lock(&xprt->xp_pool->sp_lock);
715 if (!soreadable(xprt->xp_socket))
716 xprt_inactive_locked(xprt);
717 mtx_unlock(&xprt->xp_pool->sp_lock);
718 sx_xunlock(&xprt->xp_lock);
723 SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
724 if (xprt->xp_upcallset) {
725 xprt->xp_upcallset = 0;
726 soupcall_clear(xprt->xp_socket, SO_RCV);
728 SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
730 cd->strm_stat = XPRT_DIED;
731 sx_xunlock(&xprt->xp_lock);
737 * EOF - the other end has closed the socket.
740 cd->strm_stat = XPRT_DIED;
741 sx_xunlock(&xprt->xp_lock);
746 m_last(cd->mpending)->m_next = m;
753 svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
754 struct sockaddr **addrp, struct mbuf **mp)
756 struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
761 sx_xlock(&xprt->xp_lock);
762 ct = (struct ct_data *)xprt->xp_p2;
764 sx_xunlock(&xprt->xp_lock);
767 mtx_lock(&ct->ct_lock);
771 mtx_unlock(&ct->ct_lock);
772 sx_xunlock(&xprt->xp_lock);
775 cd->mreq = m->m_nextpkt;
776 mtx_unlock(&ct->ct_lock);
777 sx_xunlock(&xprt->xp_lock);
779 xdrmbuf_create(&xdrs, m, XDR_DECODE);
780 if (! xdr_callmsg(&xdrs, msg)) {
785 *mp = xdrmbuf_getall(&xdrs);
791 svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
792 struct sockaddr *addr, struct mbuf *m)
800 * Leave space for record mark.
802 mrep = m_gethdr(M_WAITOK, MT_DATA);
803 mrep->m_data += sizeof(uint32_t);
805 xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
807 if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
808 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
809 if (!xdr_replymsg(&xdrs, msg))
812 xdrmbuf_append(&xdrs, m);
814 stat = xdr_replymsg(&xdrs, msg);
821 * Prepend a record marker containing the reply length.
823 M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
824 *mtod(mrep, uint32_t *) =
825 htonl(0x80000000 | (mrep->m_pkthdr.len
826 - sizeof(uint32_t)));
827 error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
843 svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
844 struct sockaddr *addr, struct mbuf *m)
853 * Leave space for record mark.
855 mrep = m_gethdr(M_WAITOK, MT_DATA);
856 mrep->m_data += sizeof(uint32_t);
858 xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
860 if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
861 msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
862 if (!xdr_replymsg(&xdrs, msg))
865 xdrmbuf_append(&xdrs, m);
867 stat = xdr_replymsg(&xdrs, msg);
874 * Prepend a record marker containing the reply length.
876 M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
877 *mtod(mrep, uint32_t *) =
878 htonl(0x80000000 | (mrep->m_pkthdr.len
879 - sizeof(uint32_t)));
880 sx_xlock(&xprt->xp_lock);
881 ct = (struct ct_data *)xprt->xp_p2;
883 error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
887 sx_xunlock(&xprt->xp_lock);
908 svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
910 SVCXPRT *xprt = (SVCXPRT *) arg;
918 * Get the effective UID of the sending process. Used by rpcbind, keyserv
919 * and rpc.yppasswdd on AF_LOCAL.
922 __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
928 sock = transp->xp_fd;
929 sa = (struct sockaddr *)transp->xp_rtaddr;
930 if (sa->sa_family == AF_LOCAL) {
931 ret = getpeereid(sock, &euid, &egid);