2 * util/netevent.c - event notification
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file contains event notification functions.
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
45 #include "util/net_help.h"
46 #include "util/fptr_wlist.h"
47 #include "sldns/pkthdr.h"
48 #include "sldns/sbuffer.h"
49 #include "dnstap/dnstap.h"
50 #ifdef HAVE_OPENSSL_SSL_H
51 #include <openssl/ssl.h>
53 #ifdef HAVE_OPENSSL_ERR_H
54 #include <openssl/err.h>
57 /* -------- Start of local definitions -------- */
58 /** if CMSG_ALIGN is not defined on this platform, a workaround */
61 # define CMSG_ALIGN(n) __CMSG_ALIGN(n)
62 # elif defined(CMSG_DATA_ALIGN)
63 # define CMSG_ALIGN _CMSG_DATA_ALIGN
65 # define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
69 /** if CMSG_LEN is not defined on this platform, a workaround */
71 # define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
74 /** if CMSG_SPACE is not defined on this platform, a workaround */
76 # ifdef _CMSG_HDR_ALIGN
77 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
79 # define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
83 /** The TCP reading or writing query timeout in milliseconds */
84 #define TCP_QUERY_TIMEOUT 120000
85 /** The TCP timeout in msec for fast queries, above half are used */
86 #define TCP_QUERY_TIMEOUT_FAST 200
88 #ifndef NONBLOCKING_IS_BROKEN
89 /** number of UDP reads to perform per read indication from select */
90 #define NUM_UDP_PER_SELECT 100
92 #define NUM_UDP_PER_SELECT 1
96 * The internal event structure for keeping ub_event info for the event.
97 * Possibly other structures (list, tree) this is part of.
99 struct internal_event {
101 struct comm_base* base;
102 /** ub_event event type */
107 * Internal base structure, so that every thread has its own events.
109 struct internal_base {
110 /** ub_event event_base type. */
111 struct ub_event_base* base;
112 /** seconds time pointer points here */
114 /** timeval with current time */
116 /** the event used for slow_accept timeouts */
117 struct ub_event* slow_accept;
118 /** true if slow_accept is enabled */
119 int slow_accept_enabled;
123 * Internal timer structure, to store timer event in.
125 struct internal_timer {
126 /** the super struct from which derived */
127 struct comm_timer super;
129 struct comm_base* base;
130 /** ub_event event type */
132 /** is timer enabled */
137 * Internal signal structure, to store signal event in.
139 struct internal_signal {
140 /** ub_event event type */
142 /** next in signal list */
143 struct internal_signal* next;
146 /** create a tcp handler with a parent */
147 static struct comm_point* comm_point_create_tcp_handler(
148 struct comm_base *base, struct comm_point* parent, size_t bufsize,
149 comm_point_callback_type* callback, void* callback_arg);
151 /* -------- End of local definitions -------- */
154 comm_base_create(int sigs)
156 struct comm_base* b = (struct comm_base*)calloc(1,
157 sizeof(struct comm_base));
158 const char *evnm="event", *evsys="", *evmethod="";
162 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
167 b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
174 ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
175 verbose(VERB_ALGO, "%s %s user %s method.", evnm, evsys, evmethod);
180 comm_base_create_event(struct ub_event_base* base)
182 struct comm_base* b = (struct comm_base*)calloc(1,
183 sizeof(struct comm_base));
186 b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
197 comm_base_delete(struct comm_base* b)
201 if(b->eb->slow_accept_enabled) {
202 if(ub_event_del(b->eb->slow_accept) != 0) {
203 log_err("could not event_del slow_accept");
205 ub_event_free(b->eb->slow_accept);
207 ub_event_base_free(b->eb->base);
214 comm_base_delete_no_base(struct comm_base* b)
218 if(b->eb->slow_accept_enabled) {
219 if(ub_event_del(b->eb->slow_accept) != 0) {
220 log_err("could not event_del slow_accept");
222 ub_event_free(b->eb->slow_accept);
230 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
237 comm_base_dispatch(struct comm_base* b)
240 retval = ub_event_base_dispatch(b->eb->base);
242 fatal_exit("event_dispatch returned error %d, "
243 "errno is %s", retval, strerror(errno));
247 void comm_base_exit(struct comm_base* b)
249 if(ub_event_base_loopexit(b->eb->base) != 0) {
250 log_err("Could not loopexit");
254 void comm_base_set_slow_accept_handlers(struct comm_base* b,
255 void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
257 b->stop_accept = stop_acc;
258 b->start_accept = start_acc;
262 struct ub_event_base* comm_base_internal(struct comm_base* b)
267 /** see if errno for udp has to be logged or not uses globals */
269 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
271 /* do not log transient errors (unless high verbosity) */
272 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
286 if(verbosity < VERB_ALGO)
292 /* permission denied is gotten for every send if the
293 * network is disconnected (on some OS), squelch it */
294 if( ((errno == EPERM)
295 # ifdef EADDRNOTAVAIL
296 /* 'Cannot assign requested address' also when disconnected */
297 || (errno == EADDRNOTAVAIL)
299 ) && verbosity < VERB_DETAIL)
301 /* squelch errors where people deploy AAAA ::ffff:bla for
302 * authority servers, which we try for intranets. */
303 if(errno == EINVAL && addr_is_ip4mapped(
304 (struct sockaddr_storage*)addr, addrlen) &&
305 verbosity < VERB_DETAIL)
307 /* SO_BROADCAST sockopt can give access to 255.255.255.255,
308 * but a dns cache does not need it. */
309 if(errno == EACCES && addr_is_broadcast(
310 (struct sockaddr_storage*)addr, addrlen) &&
311 verbosity < VERB_DETAIL)
316 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
318 return udp_send_errno_needs_log(addr, addrlen);
321 /* send a UDP reply */
323 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
324 struct sockaddr* addr, socklen_t addrlen)
327 log_assert(c->fd != -1);
329 if(sldns_buffer_remaining(packet) == 0)
330 log_err("error: send empty UDP packet");
332 log_assert(addr && addrlen > 0);
333 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
334 sldns_buffer_remaining(packet), 0,
337 /* try again and block, waiting for IO to complete,
338 * we want to send the answer, and we will wait for
339 * the ethernet interface buffer to have space. */
341 if(errno == EAGAIN ||
343 errno == EWOULDBLOCK ||
347 if(WSAGetLastError() == WSAEINPROGRESS ||
348 WSAGetLastError() == WSAENOBUFS ||
349 WSAGetLastError() == WSAEWOULDBLOCK) {
353 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
354 sldns_buffer_remaining(packet), 0,
357 fd_set_nonblock(c->fd);
362 if(!udp_send_errno_needs_log(addr, addrlen))
365 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
367 verbose(VERB_OPS, "sendto failed: %s",
368 wsa_strerror(WSAGetLastError()));
370 log_addr(VERB_OPS, "remote address is",
371 (struct sockaddr_storage*)addr, addrlen);
373 } else if((size_t)sent != sldns_buffer_remaining(packet)) {
374 log_err("sent %d in place of %d bytes",
375 (int)sent, (int)sldns_buffer_remaining(packet));
381 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
382 /** print debug ancillary info */
383 static void p_ancil(const char* str, struct comm_reply* r)
385 if(r->srctype != 4 && r->srctype != 6) {
386 log_info("%s: unknown srctype %d", str, r->srctype);
389 if(r->srctype == 6) {
391 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr,
392 buf, (socklen_t)sizeof(buf)) == 0) {
393 (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
395 buf[sizeof(buf)-1]=0;
396 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
397 } else if(r->srctype == 4) {
399 char buf1[1024], buf2[1024];
400 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr,
401 buf1, (socklen_t)sizeof(buf1)) == 0) {
402 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
404 buf1[sizeof(buf1)-1]=0;
405 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
406 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst,
407 buf2, (socklen_t)sizeof(buf2)) == 0) {
408 (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
410 buf2[sizeof(buf2)-1]=0;
414 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
416 #elif defined(IP_RECVDSTADDR)
418 if(inet_ntop(AF_INET, &r->pktinfo.v4addr,
419 buf1, (socklen_t)sizeof(buf1)) == 0) {
420 (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
422 buf1[sizeof(buf1)-1]=0;
423 log_info("%s: %s", str, buf1);
424 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
427 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
429 /** send a UDP reply over specified interface*/
431 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
432 struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r)
434 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
440 struct cmsghdr *cmsg;
441 #endif /* S_SPLINT_S */
443 log_assert(c->fd != -1);
445 if(sldns_buffer_remaining(packet) == 0)
446 log_err("error: send empty UDP packet");
448 log_assert(addr && addrlen > 0);
451 msg.msg_namelen = addrlen;
452 iov[0].iov_base = sldns_buffer_begin(packet);
453 iov[0].iov_len = sldns_buffer_remaining(packet);
456 msg.msg_control = control;
458 msg.msg_controllen = sizeof(control);
459 #endif /* S_SPLINT_S */
463 cmsg = CMSG_FIRSTHDR(&msg);
464 if(r->srctype == 4) {
467 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
468 log_assert(msg.msg_controllen <= sizeof(control));
469 cmsg->cmsg_level = IPPROTO_IP;
470 cmsg->cmsg_type = IP_PKTINFO;
471 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
472 sizeof(struct in_pktinfo));
473 /* unset the ifindex to not bypass the routing tables */
474 cmsg_data = CMSG_DATA(cmsg);
475 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
476 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
477 #elif defined(IP_SENDSRCADDR)
478 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
479 log_assert(msg.msg_controllen <= sizeof(control));
480 cmsg->cmsg_level = IPPROTO_IP;
481 cmsg->cmsg_type = IP_SENDSRCADDR;
482 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
483 sizeof(struct in_addr));
484 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
486 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
487 msg.msg_control = NULL;
488 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
489 } else if(r->srctype == 6) {
491 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
492 log_assert(msg.msg_controllen <= sizeof(control));
493 cmsg->cmsg_level = IPPROTO_IPV6;
494 cmsg->cmsg_type = IPV6_PKTINFO;
495 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
496 sizeof(struct in6_pktinfo));
497 /* unset the ifindex to not bypass the routing tables */
498 cmsg_data = CMSG_DATA(cmsg);
499 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
500 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
502 /* try to pass all 0 to use default route */
503 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
504 log_assert(msg.msg_controllen <= sizeof(control));
505 cmsg->cmsg_level = IPPROTO_IPV6;
506 cmsg->cmsg_type = IPV6_PKTINFO;
507 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
508 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
510 #endif /* S_SPLINT_S */
511 if(verbosity >= VERB_ALGO)
512 p_ancil("send_udp over interface", r);
513 sent = sendmsg(c->fd, &msg, 0);
515 /* try again and block, waiting for IO to complete,
516 * we want to send the answer, and we will wait for
517 * the ethernet interface buffer to have space. */
519 if(errno == EAGAIN ||
521 errno == EWOULDBLOCK ||
525 if(WSAGetLastError() == WSAEINPROGRESS ||
526 WSAGetLastError() == WSAENOBUFS ||
527 WSAGetLastError() == WSAEWOULDBLOCK) {
531 sent = sendmsg(c->fd, &msg, 0);
533 fd_set_nonblock(c->fd);
538 if(!udp_send_errno_needs_log(addr, addrlen))
540 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
541 log_addr(VERB_OPS, "remote address is",
542 (struct sockaddr_storage*)addr, addrlen);
544 /* netbsd 7 has IP_PKTINFO for recv but not send */
545 if(errno == EINVAL && r->srctype == 4)
546 log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
547 "Please disable interface-automatic");
550 } else if((size_t)sent != sldns_buffer_remaining(packet)) {
551 log_err("sent %d in place of %d bytes",
552 (int)sent, (int)sldns_buffer_remaining(packet));
562 log_err("sendmsg: IPV6_PKTINFO not supported");
564 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
568 comm_point_udp_ancil_callback(int fd, short event, void* arg)
570 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
571 struct comm_reply rep;
578 struct cmsghdr* cmsg;
579 #endif /* S_SPLINT_S */
581 rep.c = (struct comm_point*)arg;
582 log_assert(rep.c->type == comm_udp);
584 if(!(event&UB_EV_READ))
586 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
587 ub_comm_base_now(rep.c->ev->base);
588 for(i=0; i<NUM_UDP_PER_SELECT; i++) {
589 sldns_buffer_clear(rep.c->buffer);
590 rep.addrlen = (socklen_t)sizeof(rep.addr);
591 log_assert(fd != -1);
592 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
593 msg.msg_name = &rep.addr;
594 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
595 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
596 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
599 msg.msg_control = ancil;
601 msg.msg_controllen = sizeof(ancil);
602 #endif /* S_SPLINT_S */
604 rcv = recvmsg(fd, &msg, 0);
606 if(errno != EAGAIN && errno != EINTR) {
607 log_err("recvmsg failed: %s", strerror(errno));
611 rep.addrlen = msg.msg_namelen;
612 sldns_buffer_skip(rep.c->buffer, rcv);
613 sldns_buffer_flip(rep.c->buffer);
616 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
617 cmsg = CMSG_NXTHDR(&msg, cmsg)) {
618 if( cmsg->cmsg_level == IPPROTO_IPV6 &&
619 cmsg->cmsg_type == IPV6_PKTINFO) {
621 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
622 sizeof(struct in6_pktinfo));
625 } else if( cmsg->cmsg_level == IPPROTO_IP &&
626 cmsg->cmsg_type == IP_PKTINFO) {
628 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
629 sizeof(struct in_pktinfo));
631 #elif defined(IP_RECVDSTADDR)
632 } else if( cmsg->cmsg_level == IPPROTO_IP &&
633 cmsg->cmsg_type == IP_RECVDSTADDR) {
635 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
636 sizeof(struct in_addr));
638 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
641 if(verbosity >= VERB_ALGO)
642 p_ancil("receive_udp on interface", &rep);
643 #endif /* S_SPLINT_S */
644 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
645 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
646 /* send back immediate reply */
647 (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
648 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
650 if(rep.c->fd == -1) /* commpoint closed */
657 fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
658 "Please disable interface-automatic");
659 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
663 comm_point_udp_callback(int fd, short event, void* arg)
665 struct comm_reply rep;
669 rep.c = (struct comm_point*)arg;
670 log_assert(rep.c->type == comm_udp);
672 if(!(event&UB_EV_READ))
674 log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
675 ub_comm_base_now(rep.c->ev->base);
676 for(i=0; i<NUM_UDP_PER_SELECT; i++) {
677 sldns_buffer_clear(rep.c->buffer);
678 rep.addrlen = (socklen_t)sizeof(rep.addr);
679 log_assert(fd != -1);
680 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
681 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer),
682 sldns_buffer_remaining(rep.c->buffer), 0,
683 (struct sockaddr*)&rep.addr, &rep.addrlen);
686 if(errno != EAGAIN && errno != EINTR)
687 log_err("recvfrom %d failed: %s",
688 fd, strerror(errno));
690 if(WSAGetLastError() != WSAEINPROGRESS &&
691 WSAGetLastError() != WSAECONNRESET &&
692 WSAGetLastError()!= WSAEWOULDBLOCK)
693 log_err("recvfrom failed: %s",
694 wsa_strerror(WSAGetLastError()));
698 sldns_buffer_skip(rep.c->buffer, rcv);
699 sldns_buffer_flip(rep.c->buffer);
701 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
702 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
703 /* send back immediate reply */
704 (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
705 (struct sockaddr*)&rep.addr, rep.addrlen);
707 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
708 another UDP port. Note rep.c cannot be reused with TCP fd. */
713 /** Use a new tcp handler for new query fd, set to read query */
715 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max)
717 log_assert(c->type == comm_tcp);
718 log_assert(c->fd == -1);
719 sldns_buffer_clear(c->buffer);
720 c->tcp_is_reading = 1;
721 c->tcp_byte_count = 0;
722 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
723 /* if more than half the tcp handlers are in use, use a shorter
724 * timeout for this TCP connection, we need to make space for
725 * other connections to be able to get attention */
727 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT_FAST;
728 comm_point_start_listening(c, fd, c->tcp_timeout_msec);
731 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
732 short ATTR_UNUSED(event), void* arg)
734 struct comm_base* b = (struct comm_base*)arg;
735 /* timeout for the slow accept, re-enable accepts again */
736 if(b->start_accept) {
737 verbose(VERB_ALGO, "wait is over, slow accept disabled");
738 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
739 (*b->start_accept)(b->cb_arg);
740 b->eb->slow_accept_enabled = 0;
744 int comm_point_perform_accept(struct comm_point* c,
745 struct sockaddr_storage* addr, socklen_t* addrlen)
748 *addrlen = (socklen_t)sizeof(*addr);
749 new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
752 /* EINTR is signal interrupt. others are closed connection. */
753 if( errno == EINTR || errno == EAGAIN
755 || errno == EWOULDBLOCK
758 || errno == ECONNABORTED
765 #if defined(ENFILE) && defined(EMFILE)
766 if(errno == ENFILE || errno == EMFILE) {
767 /* out of file descriptors, likely outside of our
768 * control. stop accept() calls for some time */
769 if(c->ev->base->stop_accept) {
770 struct comm_base* b = c->ev->base;
772 verbose(VERB_ALGO, "out of file descriptors: "
774 b->eb->slow_accept_enabled = 1;
775 fptr_ok(fptr_whitelist_stop_accept(
777 (*b->stop_accept)(b->cb_arg);
778 /* set timeout, no mallocs */
779 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
780 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
781 b->eb->slow_accept = ub_event_new(b->eb->base,
783 comm_base_handle_slow_accept, b);
784 if(b->eb->slow_accept == NULL) {
785 /* we do not want to log here, because
786 * that would spam the logfiles.
787 * error: "event_base_set failed." */
789 else if(ub_event_add(b->eb->slow_accept, &tv)
791 /* we do not want to log here,
792 * error: "event_add failed." */
798 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
799 #else /* USE_WINSOCK */
800 if(WSAGetLastError() == WSAEINPROGRESS ||
801 WSAGetLastError() == WSAECONNRESET)
803 if(WSAGetLastError() == WSAEWOULDBLOCK) {
804 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
807 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
812 fd_set_nonblock(new_fd);
817 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
818 int ATTR_UNUSED(argi), long argl, long retvalue)
820 verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
821 (oper&BIO_CB_RETURN)?"return":"before",
822 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
823 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
824 /* on windows, check if previous operation caused EWOULDBLOCK */
825 if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
826 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
827 if(WSAGetLastError() == WSAEWOULDBLOCK)
828 ub_winsock_tcp_wouldblock((struct ub_event*)
829 BIO_get_callback_arg(b), UB_EV_READ);
831 if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
832 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
833 if(WSAGetLastError() == WSAEWOULDBLOCK)
834 ub_winsock_tcp_wouldblock((struct ub_event*)
835 BIO_get_callback_arg(b), UB_EV_WRITE);
837 /* return original return value */
841 /** set win bio callbacks for nonblocking operations */
843 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
845 SSL* ssl = (SSL*)thessl;
846 /* set them both just in case, but usually they are the same BIO */
847 BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
848 BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
849 BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
850 BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
855 comm_point_tcp_accept_callback(int fd, short event, void* arg)
857 struct comm_point* c = (struct comm_point*)arg, *c_hdl;
859 log_assert(c->type == comm_tcp_accept);
860 if(!(event & UB_EV_READ)) {
861 log_info("ignoring tcp accept event %d", (int)event);
864 ub_comm_base_now(c->ev->base);
865 /* find free tcp handler. */
867 log_warn("accepted too many tcp, connections full");
870 /* accept incoming connection. */
872 log_assert(fd != -1);
874 new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
875 &c_hdl->repinfo.addrlen);
879 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
882 comm_point_close(c_hdl);
885 c_hdl->ssl_shake_state = comm_ssl_shake_read;
887 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
891 /* grab the tcp handler buffers */
893 c->tcp_free = c_hdl->tcp_free;
895 /* stop accepting incoming queries for now. */
896 comm_point_stop_listening(c);
898 setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
901 /** Make tcp handler free for next assignment */
903 reclaim_tcp_handler(struct comm_point* c)
905 log_assert(c->type == comm_tcp);
908 SSL_shutdown(c->ssl);
915 c->tcp_parent->cur_tcp_count--;
916 c->tcp_free = c->tcp_parent->tcp_free;
917 c->tcp_parent->tcp_free = c;
919 /* re-enable listening on accept socket */
920 comm_point_start_listening(c->tcp_parent, -1, -1);
925 /** do the callback when writing is done */
927 tcp_callback_writer(struct comm_point* c)
929 log_assert(c->type == comm_tcp);
930 sldns_buffer_clear(c->buffer);
931 if(c->tcp_do_toggle_rw)
932 c->tcp_is_reading = 1;
933 c->tcp_byte_count = 0;
934 /* switch from listening(write) to listening(read) */
935 comm_point_stop_listening(c);
936 comm_point_start_listening(c, -1, -1);
939 /** do the callback when reading is done */
941 tcp_callback_reader(struct comm_point* c)
943 log_assert(c->type == comm_tcp || c->type == comm_local);
944 sldns_buffer_flip(c->buffer);
945 if(c->tcp_do_toggle_rw)
946 c->tcp_is_reading = 0;
947 c->tcp_byte_count = 0;
948 if(c->type == comm_tcp)
949 comm_point_stop_listening(c);
950 fptr_ok(fptr_whitelist_comm_point(c->callback));
951 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
952 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
956 /** continue ssl handshake */
959 ssl_handshake(struct comm_point* c)
962 if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
963 /* read condition satisfied back to writing */
964 comm_point_listen_for_rw(c, 1, 1);
965 c->ssl_shake_state = comm_ssl_shake_none;
968 if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
969 /* write condition satisfied, back to reading */
970 comm_point_listen_for_rw(c, 1, 0);
971 c->ssl_shake_state = comm_ssl_shake_none;
976 r = SSL_do_handshake(c->ssl);
978 int want = SSL_get_error(c->ssl, r);
979 if(want == SSL_ERROR_WANT_READ) {
980 if(c->ssl_shake_state == comm_ssl_shake_read)
982 c->ssl_shake_state = comm_ssl_shake_read;
983 comm_point_listen_for_rw(c, 1, 0);
985 } else if(want == SSL_ERROR_WANT_WRITE) {
986 if(c->ssl_shake_state == comm_ssl_shake_write)
988 c->ssl_shake_state = comm_ssl_shake_write;
989 comm_point_listen_for_rw(c, 0, 1);
992 return 0; /* closed */
993 } else if(want == SSL_ERROR_SYSCALL) {
994 /* SYSCALL and errno==0 means closed uncleanly */
996 log_err("SSL_handshake syscall: %s",
1000 log_crypto_err("ssl handshake failed");
1001 log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1002 c->repinfo.addrlen);
1006 /* this is where peer verification could take place */
1007 log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
1008 c->repinfo.addrlen);
1010 /* setup listen rw correctly */
1011 if(c->tcp_is_reading) {
1012 if(c->ssl_shake_state != comm_ssl_shake_read)
1013 comm_point_listen_for_rw(c, 1, 0);
1015 comm_point_listen_for_rw(c, 1, 1);
1017 c->ssl_shake_state = comm_ssl_shake_none;
1020 #endif /* HAVE_SSL */
1022 /** ssl read callback on TCP */
1024 ssl_handle_read(struct comm_point* c)
1028 if(c->ssl_shake_state != comm_ssl_shake_none) {
1029 if(!ssl_handshake(c))
1031 if(c->ssl_shake_state != comm_ssl_shake_none)
1034 if(c->tcp_byte_count < sizeof(uint16_t)) {
1035 /* read length bytes */
1037 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1038 c->tcp_byte_count), (int)(sizeof(uint16_t) -
1039 c->tcp_byte_count))) <= 0) {
1040 int want = SSL_get_error(c->ssl, r);
1041 if(want == SSL_ERROR_ZERO_RETURN) {
1042 return 0; /* shutdown, closed */
1043 } else if(want == SSL_ERROR_WANT_READ) {
1044 return 1; /* read more later */
1045 } else if(want == SSL_ERROR_WANT_WRITE) {
1046 c->ssl_shake_state = comm_ssl_shake_hs_write;
1047 comm_point_listen_for_rw(c, 0, 1);
1049 } else if(want == SSL_ERROR_SYSCALL) {
1051 log_err("SSL_read syscall: %s",
1055 log_crypto_err("could not SSL_read");
1058 c->tcp_byte_count += r;
1059 if(c->tcp_byte_count != sizeof(uint16_t))
1061 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1062 sldns_buffer_capacity(c->buffer)) {
1063 verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1066 sldns_buffer_set_limit(c->buffer,
1067 sldns_buffer_read_u16_at(c->buffer, 0));
1068 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1069 verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1072 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1073 (int)sldns_buffer_limit(c->buffer));
1075 log_assert(sldns_buffer_remaining(c->buffer) > 0);
1077 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1078 (int)sldns_buffer_remaining(c->buffer));
1080 int want = SSL_get_error(c->ssl, r);
1081 if(want == SSL_ERROR_ZERO_RETURN) {
1082 return 0; /* shutdown, closed */
1083 } else if(want == SSL_ERROR_WANT_READ) {
1084 return 1; /* read more later */
1085 } else if(want == SSL_ERROR_WANT_WRITE) {
1086 c->ssl_shake_state = comm_ssl_shake_hs_write;
1087 comm_point_listen_for_rw(c, 0, 1);
1089 } else if(want == SSL_ERROR_SYSCALL) {
1091 log_err("SSL_read syscall: %s",
1095 log_crypto_err("could not SSL_read");
1098 sldns_buffer_skip(c->buffer, (ssize_t)r);
1099 if(sldns_buffer_remaining(c->buffer) <= 0) {
1100 tcp_callback_reader(c);
1106 #endif /* HAVE_SSL */
1109 /** ssl write callback on TCP */
1111 ssl_handle_write(struct comm_point* c)
1115 if(c->ssl_shake_state != comm_ssl_shake_none) {
1116 if(!ssl_handshake(c))
1118 if(c->ssl_shake_state != comm_ssl_shake_none)
1121 /* ignore return, if fails we may simply block */
1122 (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1123 if(c->tcp_byte_count < sizeof(uint16_t)) {
1124 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1126 r = SSL_write(c->ssl,
1127 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1128 (int)(sizeof(uint16_t)-c->tcp_byte_count));
1130 int want = SSL_get_error(c->ssl, r);
1131 if(want == SSL_ERROR_ZERO_RETURN) {
1132 return 0; /* closed */
1133 } else if(want == SSL_ERROR_WANT_READ) {
1134 c->ssl_shake_state = comm_ssl_shake_read;
1135 comm_point_listen_for_rw(c, 1, 0);
1136 return 1; /* wait for read condition */
1137 } else if(want == SSL_ERROR_WANT_WRITE) {
1138 return 1; /* write more later */
1139 } else if(want == SSL_ERROR_SYSCALL) {
1141 log_err("SSL_write syscall: %s",
1145 log_crypto_err("could not SSL_write");
1148 c->tcp_byte_count += r;
1149 if(c->tcp_byte_count < sizeof(uint16_t))
1151 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1153 if(sldns_buffer_remaining(c->buffer) == 0) {
1154 tcp_callback_writer(c);
1158 log_assert(sldns_buffer_remaining(c->buffer) > 0);
1160 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1161 (int)sldns_buffer_remaining(c->buffer));
1163 int want = SSL_get_error(c->ssl, r);
1164 if(want == SSL_ERROR_ZERO_RETURN) {
1165 return 0; /* closed */
1166 } else if(want == SSL_ERROR_WANT_READ) {
1167 c->ssl_shake_state = comm_ssl_shake_read;
1168 comm_point_listen_for_rw(c, 1, 0);
1169 return 1; /* wait for read condition */
1170 } else if(want == SSL_ERROR_WANT_WRITE) {
1171 return 1; /* write more later */
1172 } else if(want == SSL_ERROR_SYSCALL) {
1174 log_err("SSL_write syscall: %s",
1178 log_crypto_err("could not SSL_write");
1181 sldns_buffer_skip(c->buffer, (ssize_t)r);
1183 if(sldns_buffer_remaining(c->buffer) == 0) {
1184 tcp_callback_writer(c);
1190 #endif /* HAVE_SSL */
1193 /** handle ssl tcp connection with dns contents */
1195 ssl_handle_it(struct comm_point* c)
1197 if(c->tcp_is_reading)
1198 return ssl_handle_read(c);
1199 return ssl_handle_write(c);
1202 /** Handle tcp reading callback.
1203 * @param fd: file descriptor of socket.
1204 * @param c: comm point to read from into buffer.
1205 * @param short_ok: if true, very short packets are OK (for comm_local).
1206 * @return: 0 on error
1209 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1212 log_assert(c->type == comm_tcp || c->type == comm_local);
1214 return ssl_handle_it(c);
1215 if(!c->tcp_is_reading)
1218 log_assert(fd != -1);
1219 if(c->tcp_byte_count < sizeof(uint16_t)) {
1220 /* read length bytes */
1221 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1222 sizeof(uint16_t)-c->tcp_byte_count, 0);
1227 if(errno == EINTR || errno == EAGAIN)
1230 if(errno == ECONNRESET && verbosity < 2)
1231 return 0; /* silence reset by peer */
1233 log_err_addr("read (in tcp s)", strerror(errno),
1234 &c->repinfo.addr, c->repinfo.addrlen);
1235 #else /* USE_WINSOCK */
1236 if(WSAGetLastError() == WSAECONNRESET)
1238 if(WSAGetLastError() == WSAEINPROGRESS)
1240 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1241 ub_winsock_tcp_wouldblock(c->ev->ev,
1245 log_err_addr("read (in tcp s)",
1246 wsa_strerror(WSAGetLastError()),
1247 &c->repinfo.addr, c->repinfo.addrlen);
1251 c->tcp_byte_count += r;
1252 if(c->tcp_byte_count != sizeof(uint16_t))
1254 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1255 sldns_buffer_capacity(c->buffer)) {
1256 verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1259 sldns_buffer_set_limit(c->buffer,
1260 sldns_buffer_read_u16_at(c->buffer, 0));
1262 sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1263 verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1266 verbose(VERB_ALGO, "Reading tcp query of length %d",
1267 (int)sldns_buffer_limit(c->buffer));
1270 log_assert(sldns_buffer_remaining(c->buffer) > 0);
1271 r = recv(fd, (void*)sldns_buffer_current(c->buffer),
1272 sldns_buffer_remaining(c->buffer), 0);
1275 } else if(r == -1) {
1277 if(errno == EINTR || errno == EAGAIN)
1279 log_err_addr("read (in tcp r)", strerror(errno),
1280 &c->repinfo.addr, c->repinfo.addrlen);
1281 #else /* USE_WINSOCK */
1282 if(WSAGetLastError() == WSAECONNRESET)
1284 if(WSAGetLastError() == WSAEINPROGRESS)
1286 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1287 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1290 log_err_addr("read (in tcp r)",
1291 wsa_strerror(WSAGetLastError()),
1292 &c->repinfo.addr, c->repinfo.addrlen);
1296 sldns_buffer_skip(c->buffer, r);
1297 if(sldns_buffer_remaining(c->buffer) <= 0) {
1298 tcp_callback_reader(c);
1304 * Handle tcp writing callback.
1305 * @param fd: file descriptor of socket.
1306 * @param c: comm point to write buffer out of.
1307 * @return: 0 on error
1310 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1313 log_assert(c->type == comm_tcp);
1314 if(c->tcp_is_reading && !c->ssl)
1316 log_assert(fd != -1);
1317 if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1318 /* check for pending error from nonblocking connect */
1319 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1321 socklen_t len = (socklen_t)sizeof(error);
1322 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error,
1325 error = errno; /* on solaris errno is error */
1326 #else /* USE_WINSOCK */
1327 error = WSAGetLastError();
1331 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1332 if(error == EINPROGRESS || error == EWOULDBLOCK)
1333 return 1; /* try again later */
1336 if(error != 0 && verbosity < 2)
1337 return 0; /* silence lots of chatter in the logs */
1338 else if(error != 0) {
1339 log_err_addr("tcp connect", strerror(error),
1340 &c->repinfo.addr, c->repinfo.addrlen);
1341 #else /* USE_WINSOCK */
1343 if(error == WSAEINPROGRESS)
1345 else if(error == WSAEWOULDBLOCK) {
1346 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1348 } else if(error != 0 && verbosity < 2)
1350 else if(error != 0) {
1351 log_err_addr("tcp connect", wsa_strerror(error),
1352 &c->repinfo.addr, c->repinfo.addrlen);
1353 #endif /* USE_WINSOCK */
1358 return ssl_handle_it(c);
1360 #ifdef USE_MSG_FASTOPEN
1361 /* Only try this on first use of a connection that uses tfo,
1362 otherwise fall through to normal write */
1363 /* Also, TFO support on WINDOWS not implemented at the moment */
1364 if(c->tcp_do_fastopen == 1) {
1365 /* this form of sendmsg() does both a connect() and send() so need to
1366 look for various flavours of error*/
1367 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1369 struct iovec iov[2];
1370 c->tcp_do_fastopen = 0;
1371 memset(&msg, 0, sizeof(msg));
1372 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1373 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1374 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1375 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1376 log_assert(iov[0].iov_len > 0);
1377 log_assert(iov[1].iov_len > 0);
1378 msg.msg_name = &c->repinfo.addr;
1379 msg.msg_namelen = c->repinfo.addrlen;
1382 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1384 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1385 /* Handshake is underway, maybe because no TFO cookie available.
1386 Come back to write the messsage*/
1387 if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1390 if(errno == EINTR || errno == EAGAIN)
1392 /* Not handling EISCONN here as shouldn't ever hit that case.*/
1393 if(errno != 0 && verbosity < 2)
1394 return 0; /* silence lots of chatter in the logs */
1396 log_err_addr("tcp sendmsg", strerror(errno),
1397 &c->repinfo.addr, c->repinfo.addrlen);
1400 c->tcp_byte_count += r;
1401 if(c->tcp_byte_count < sizeof(uint16_t))
1403 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1405 if(sldns_buffer_remaining(c->buffer) == 0) {
1406 tcp_callback_writer(c);
1411 #endif /* USE_MSG_FASTOPEN */
1413 if(c->tcp_byte_count < sizeof(uint16_t)) {
1414 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1416 struct iovec iov[2];
1417 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1418 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1419 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1420 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1421 log_assert(iov[0].iov_len > 0);
1422 log_assert(iov[1].iov_len > 0);
1423 r = writev(fd, iov, 2);
1424 #else /* HAVE_WRITEV */
1425 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1426 sizeof(uint16_t)-c->tcp_byte_count, 0);
1427 #endif /* HAVE_WRITEV */
1431 if(errno == EPIPE && verbosity < 2)
1432 return 0; /* silence 'broken pipe' */
1434 if(errno == EINTR || errno == EAGAIN)
1437 log_err_addr("tcp writev", strerror(errno),
1438 &c->repinfo.addr, c->repinfo.addrlen);
1439 # else /* HAVE_WRITEV */
1440 log_err_addr("tcp send s", strerror(errno),
1441 &c->repinfo.addr, c->repinfo.addrlen);
1442 # endif /* HAVE_WRITEV */
1444 if(WSAGetLastError() == WSAENOTCONN)
1446 if(WSAGetLastError() == WSAEINPROGRESS)
1448 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1449 ub_winsock_tcp_wouldblock(c->ev->ev,
1453 log_err_addr("tcp send s",
1454 wsa_strerror(WSAGetLastError()),
1455 &c->repinfo.addr, c->repinfo.addrlen);
1459 c->tcp_byte_count += r;
1460 if(c->tcp_byte_count < sizeof(uint16_t))
1462 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1464 if(sldns_buffer_remaining(c->buffer) == 0) {
1465 tcp_callback_writer(c);
1469 log_assert(sldns_buffer_remaining(c->buffer) > 0);
1470 r = send(fd, (void*)sldns_buffer_current(c->buffer),
1471 sldns_buffer_remaining(c->buffer), 0);
1474 if(errno == EINTR || errno == EAGAIN)
1476 log_err_addr("tcp send r", strerror(errno),
1477 &c->repinfo.addr, c->repinfo.addrlen);
1479 if(WSAGetLastError() == WSAEINPROGRESS)
1481 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1482 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1485 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1486 &c->repinfo.addr, c->repinfo.addrlen);
1490 sldns_buffer_skip(c->buffer, r);
1492 if(sldns_buffer_remaining(c->buffer) == 0) {
1493 tcp_callback_writer(c);
1500 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1502 struct comm_point* c = (struct comm_point*)arg;
1503 log_assert(c->type == comm_tcp);
1504 ub_comm_base_now(c->ev->base);
1506 if(event&UB_EV_READ) {
1507 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1508 reclaim_tcp_handler(c);
1509 if(!c->tcp_do_close) {
1510 fptr_ok(fptr_whitelist_comm_point(
1512 (void)(*c->callback)(c, c->cb_arg,
1513 NETEVENT_CLOSED, NULL);
1518 if(event&UB_EV_WRITE) {
1519 if(!comm_point_tcp_handle_write(fd, c)) {
1520 reclaim_tcp_handler(c);
1521 if(!c->tcp_do_close) {
1522 fptr_ok(fptr_whitelist_comm_point(
1524 (void)(*c->callback)(c, c->cb_arg,
1525 NETEVENT_CLOSED, NULL);
1530 if(event&UB_EV_TIMEOUT) {
1531 verbose(VERB_QUERY, "tcp took too long, dropped");
1532 reclaim_tcp_handler(c);
1533 if(!c->tcp_do_close) {
1534 fptr_ok(fptr_whitelist_comm_point(c->callback));
1535 (void)(*c->callback)(c, c->cb_arg,
1536 NETEVENT_TIMEOUT, NULL);
1540 log_err("Ignored event %d for tcphdl.", event);
1543 void comm_point_local_handle_callback(int fd, short event, void* arg)
1545 struct comm_point* c = (struct comm_point*)arg;
1546 log_assert(c->type == comm_local);
1547 ub_comm_base_now(c->ev->base);
1549 if(event&UB_EV_READ) {
1550 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1551 fptr_ok(fptr_whitelist_comm_point(c->callback));
1552 (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED,
1557 log_err("Ignored event %d for localhdl.", event);
1560 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd),
1561 short event, void* arg)
1563 struct comm_point* c = (struct comm_point*)arg;
1564 int err = NETEVENT_NOERROR;
1565 log_assert(c->type == comm_raw);
1566 ub_comm_base_now(c->ev->base);
1568 if(event&UB_EV_TIMEOUT)
1569 err = NETEVENT_TIMEOUT;
1570 fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1571 (void)(*c->callback)(c, c->cb_arg, err, NULL);
1575 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1576 comm_point_callback_type* callback, void* callback_arg)
1578 struct comm_point* c = (struct comm_point*)calloc(1,
1579 sizeof(struct comm_point));
1583 c->ev = (struct internal_event*)calloc(1,
1584 sizeof(struct internal_event));
1593 c->tcp_is_reading = 0;
1594 c->tcp_byte_count = 0;
1595 c->tcp_parent = NULL;
1596 c->max_tcp_count = 0;
1597 c->cur_tcp_count = 0;
1598 c->tcp_handlers = NULL;
1601 c->tcp_do_close = 0;
1602 c->do_not_close = 0;
1603 c->tcp_do_toggle_rw = 0;
1604 c->tcp_check_nb_connect = 0;
1605 #ifdef USE_MSG_FASTOPEN
1606 c->tcp_do_fastopen = 0;
1609 c->callback = callback;
1610 c->cb_arg = callback_arg;
1611 evbits = UB_EV_READ | UB_EV_PERSIST;
1612 /* ub_event stuff */
1613 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1614 comm_point_udp_callback, c);
1615 if(c->ev->ev == NULL) {
1616 log_err("could not baseset udp event");
1617 comm_point_delete(c);
1620 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
1621 log_err("could not add udp event");
1622 comm_point_delete(c);
1629 comm_point_create_udp_ancil(struct comm_base *base, int fd,
1630 sldns_buffer* buffer,
1631 comm_point_callback_type* callback, void* callback_arg)
1633 struct comm_point* c = (struct comm_point*)calloc(1,
1634 sizeof(struct comm_point));
1638 c->ev = (struct internal_event*)calloc(1,
1639 sizeof(struct internal_event));
1648 c->tcp_is_reading = 0;
1649 c->tcp_byte_count = 0;
1650 c->tcp_parent = NULL;
1651 c->max_tcp_count = 0;
1652 c->cur_tcp_count = 0;
1653 c->tcp_handlers = NULL;
1656 c->tcp_do_close = 0;
1657 c->do_not_close = 0;
1659 c->tcp_do_toggle_rw = 0;
1660 c->tcp_check_nb_connect = 0;
1661 #ifdef USE_MSG_FASTOPEN
1662 c->tcp_do_fastopen = 0;
1664 c->callback = callback;
1665 c->cb_arg = callback_arg;
1666 evbits = UB_EV_READ | UB_EV_PERSIST;
1667 /* ub_event stuff */
1668 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1669 comm_point_udp_ancil_callback, c);
1670 if(c->ev->ev == NULL) {
1671 log_err("could not baseset udp event");
1672 comm_point_delete(c);
1675 if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
1676 log_err("could not add udp event");
1677 comm_point_delete(c);
1683 static struct comm_point*
1684 comm_point_create_tcp_handler(struct comm_base *base,
1685 struct comm_point* parent, size_t bufsize,
1686 comm_point_callback_type* callback, void* callback_arg)
1688 struct comm_point* c = (struct comm_point*)calloc(1,
1689 sizeof(struct comm_point));
1693 c->ev = (struct internal_event*)calloc(1,
1694 sizeof(struct internal_event));
1701 c->buffer = sldns_buffer_new(bufsize);
1707 c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1709 sldns_buffer_free(c->buffer);
1714 c->tcp_is_reading = 0;
1715 c->tcp_byte_count = 0;
1716 c->tcp_parent = parent;
1717 c->max_tcp_count = 0;
1718 c->cur_tcp_count = 0;
1719 c->tcp_handlers = NULL;
1722 c->tcp_do_close = 0;
1723 c->do_not_close = 0;
1724 c->tcp_do_toggle_rw = 1;
1725 c->tcp_check_nb_connect = 0;
1726 #ifdef USE_MSG_FASTOPEN
1727 c->tcp_do_fastopen = 0;
1730 c->callback = callback;
1731 c->cb_arg = callback_arg;
1732 /* add to parent free list */
1733 c->tcp_free = parent->tcp_free;
1734 parent->tcp_free = c;
1735 /* ub_event stuff */
1736 evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
1737 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1738 comm_point_tcp_handle_callback, c);
1739 if(c->ev->ev == NULL)
1741 log_err("could not basetset tcphdl event");
1742 parent->tcp_free = c->tcp_free;
1751 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1752 comm_point_callback_type* callback, void* callback_arg)
1754 struct comm_point* c = (struct comm_point*)calloc(1,
1755 sizeof(struct comm_point));
1758 /* first allocate the TCP accept listener */
1761 c->ev = (struct internal_event*)calloc(1,
1762 sizeof(struct internal_event));
1771 c->tcp_is_reading = 0;
1772 c->tcp_byte_count = 0;
1773 c->tcp_parent = NULL;
1774 c->max_tcp_count = num;
1775 c->cur_tcp_count = 0;
1776 c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1777 sizeof(struct comm_point*));
1778 if(!c->tcp_handlers) {
1784 c->type = comm_tcp_accept;
1785 c->tcp_do_close = 0;
1786 c->do_not_close = 0;
1787 c->tcp_do_toggle_rw = 0;
1788 c->tcp_check_nb_connect = 0;
1789 #ifdef USE_MSG_FASTOPEN
1790 c->tcp_do_fastopen = 0;
1794 evbits = UB_EV_READ | UB_EV_PERSIST;
1795 /* ub_event stuff */
1796 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1797 comm_point_tcp_accept_callback, c);
1798 if(c->ev->ev == NULL) {
1799 log_err("could not baseset tcpacc event");
1800 comm_point_delete(c);
1803 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1804 log_err("could not add tcpacc event");
1805 comm_point_delete(c);
1808 /* now prealloc the tcp handlers */
1809 for(i=0; i<num; i++) {
1810 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1811 c, bufsize, callback, callback_arg);
1812 if(!c->tcp_handlers[i]) {
1813 comm_point_delete(c);
1822 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1823 comm_point_callback_type* callback, void* callback_arg)
1825 struct comm_point* c = (struct comm_point*)calloc(1,
1826 sizeof(struct comm_point));
1830 c->ev = (struct internal_event*)calloc(1,
1831 sizeof(struct internal_event));
1838 c->buffer = sldns_buffer_new(bufsize);
1845 c->tcp_is_reading = 0;
1846 c->tcp_byte_count = 0;
1847 c->tcp_parent = NULL;
1848 c->max_tcp_count = 0;
1849 c->cur_tcp_count = 0;
1850 c->tcp_handlers = NULL;
1853 c->tcp_do_close = 0;
1854 c->do_not_close = 0;
1855 c->tcp_do_toggle_rw = 1;
1856 c->tcp_check_nb_connect = 1;
1857 #ifdef USE_MSG_FASTOPEN
1858 c->tcp_do_fastopen = 1;
1861 c->callback = callback;
1862 c->cb_arg = callback_arg;
1863 evbits = UB_EV_PERSIST | UB_EV_WRITE;
1864 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1865 comm_point_tcp_handle_callback, c);
1866 if(c->ev->ev == NULL)
1868 log_err("could not baseset tcpout event");
1869 sldns_buffer_free(c->buffer);
1879 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1880 comm_point_callback_type* callback, void* callback_arg)
1882 struct comm_point* c = (struct comm_point*)calloc(1,
1883 sizeof(struct comm_point));
1887 c->ev = (struct internal_event*)calloc(1,
1888 sizeof(struct internal_event));
1895 c->buffer = sldns_buffer_new(bufsize);
1902 c->tcp_is_reading = 1;
1903 c->tcp_byte_count = 0;
1904 c->tcp_parent = NULL;
1905 c->max_tcp_count = 0;
1906 c->cur_tcp_count = 0;
1907 c->tcp_handlers = NULL;
1909 c->type = comm_local;
1910 c->tcp_do_close = 0;
1911 c->do_not_close = 1;
1912 c->tcp_do_toggle_rw = 0;
1913 c->tcp_check_nb_connect = 0;
1914 #ifdef USE_MSG_FASTOPEN
1915 c->tcp_do_fastopen = 0;
1917 c->callback = callback;
1918 c->cb_arg = callback_arg;
1919 /* ub_event stuff */
1920 evbits = UB_EV_PERSIST | UB_EV_READ;
1921 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1922 comm_point_local_handle_callback, c);
1923 if(c->ev->ev == NULL) {
1924 log_err("could not baseset localhdl event");
1929 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1930 log_err("could not add localhdl event");
1931 ub_event_free(c->ev->ev);
1940 comm_point_create_raw(struct comm_base* base, int fd, int writing,
1941 comm_point_callback_type* callback, void* callback_arg)
1943 struct comm_point* c = (struct comm_point*)calloc(1,
1944 sizeof(struct comm_point));
1948 c->ev = (struct internal_event*)calloc(1,
1949 sizeof(struct internal_event));
1958 c->tcp_is_reading = 0;
1959 c->tcp_byte_count = 0;
1960 c->tcp_parent = NULL;
1961 c->max_tcp_count = 0;
1962 c->cur_tcp_count = 0;
1963 c->tcp_handlers = NULL;
1966 c->tcp_do_close = 0;
1967 c->do_not_close = 1;
1968 c->tcp_do_toggle_rw = 0;
1969 c->tcp_check_nb_connect = 0;
1970 #ifdef USE_MSG_FASTOPEN
1971 c->tcp_do_fastopen = 0;
1973 c->callback = callback;
1974 c->cb_arg = callback_arg;
1975 /* ub_event stuff */
1977 evbits = UB_EV_PERSIST | UB_EV_WRITE;
1978 else evbits = UB_EV_PERSIST | UB_EV_READ;
1979 c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1980 comm_point_raw_handle_callback, c);
1981 if(c->ev->ev == NULL) {
1982 log_err("could not baseset rawhdl event");
1987 if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1988 log_err("could not add rawhdl event");
1989 ub_event_free(c->ev->ev);
1998 comm_point_close(struct comm_point* c)
2003 if(ub_event_del(c->ev->ev) != 0) {
2004 log_err("could not event_del on close");
2006 /* close fd after removing from event lists, or epoll.. is messed up */
2007 if(c->fd != -1 && !c->do_not_close) {
2008 verbose(VERB_ALGO, "close fd %d", c->fd);
2019 comm_point_delete(struct comm_point* c)
2023 if(c->type == comm_tcp && c->ssl) {
2025 SSL_shutdown(c->ssl);
2029 comm_point_close(c);
2030 if(c->tcp_handlers) {
2032 for(i=0; i<c->max_tcp_count; i++)
2033 comm_point_delete(c->tcp_handlers[i]);
2034 free(c->tcp_handlers);
2037 if(c->type == comm_tcp || c->type == comm_local)
2038 sldns_buffer_free(c->buffer);
2039 ub_event_free(c->ev->ev);
2045 comm_point_send_reply(struct comm_reply *repinfo)
2047 log_assert(repinfo && repinfo->c);
2048 if(repinfo->c->type == comm_udp) {
2049 if(repinfo->srctype)
2050 comm_point_send_udp_msg_if(repinfo->c,
2051 repinfo->c->buffer, (struct sockaddr*)&repinfo->addr,
2052 repinfo->addrlen, repinfo);
2054 comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
2055 (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
2057 if(repinfo->c->dtenv != NULL &&
2058 repinfo->c->dtenv->log_client_response_messages)
2059 dt_msg_send_client_response(repinfo->c->dtenv,
2060 &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2064 if(repinfo->c->tcp_parent->dtenv != NULL &&
2065 repinfo->c->tcp_parent->dtenv->log_client_response_messages)
2066 dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
2067 &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2069 comm_point_start_listening(repinfo->c, -1,
2070 repinfo->c->tcp_timeout_msec);
2075 comm_point_drop_reply(struct comm_reply* repinfo)
2079 log_assert(repinfo && repinfo->c);
2080 log_assert(repinfo->c->type != comm_tcp_accept);
2081 if(repinfo->c->type == comm_udp)
2083 reclaim_tcp_handler(repinfo->c);
2087 comm_point_stop_listening(struct comm_point* c)
2089 verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
2090 if(ub_event_del(c->ev->ev) != 0) {
2091 log_err("event_del error to stoplisten");
2096 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
2098 verbose(VERB_ALGO, "comm point start listening %d",
2099 c->fd==-1?newfd:c->fd);
2100 if(c->type == comm_tcp_accept && !c->tcp_free) {
2101 /* no use to start listening no free slots. */
2104 if(msec != -1 && msec != 0) {
2106 c->timeout = (struct timeval*)malloc(sizeof(
2109 log_err("cpsl: malloc failed. No net read.");
2113 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
2114 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2115 c->timeout->tv_sec = msec/1000;
2116 c->timeout->tv_usec = (msec%1000)*1000;
2117 #endif /* S_SPLINT_S */
2119 if(c->type == comm_tcp) {
2120 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
2121 if(c->tcp_is_reading)
2122 ub_event_add_bits(c->ev->ev, UB_EV_READ);
2123 else ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
2134 ub_event_set_fd(c->ev->ev, c->fd);
2136 if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
2137 log_err("event_add failed. in cpsl.");
2141 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2143 verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2144 if(ub_event_del(c->ev->ev) != 0) {
2145 log_err("event_del error to cplf");
2147 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
2148 if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
2149 if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
2150 if(ub_event_add(c->ev->ev, c->timeout) != 0) {
2151 log_err("event_add failed. in cplf.");
2155 size_t comm_point_get_mem(struct comm_point* c)
2160 s = sizeof(*c) + sizeof(*c->ev);
2162 s += sizeof(*c->timeout);
2163 if(c->type == comm_tcp || c->type == comm_local)
2164 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2165 if(c->type == comm_tcp_accept) {
2167 for(i=0; i<c->max_tcp_count; i++)
2168 s += comm_point_get_mem(c->tcp_handlers[i]);
2174 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2176 struct internal_timer *tm = (struct internal_timer*)calloc(1,
2177 sizeof(struct internal_timer));
2179 log_err("malloc failed");
2182 tm->super.ev_timer = tm;
2184 tm->super.callback = cb;
2185 tm->super.cb_arg = cb_arg;
2186 tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT,
2187 comm_timer_callback, &tm->super);
2188 if(tm->ev == NULL) {
2189 log_err("timer_create: event_base_set failed.");
2197 comm_timer_disable(struct comm_timer* timer)
2201 ub_timer_del(timer->ev_timer->ev);
2202 timer->ev_timer->enabled = 0;
2206 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2209 if(timer->ev_timer->enabled)
2210 comm_timer_disable(timer);
2211 if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
2212 comm_timer_callback, timer, tv) != 0)
2213 log_err("comm_timer_set: evtimer_add failed.");
2214 timer->ev_timer->enabled = 1;
2218 comm_timer_delete(struct comm_timer* timer)
2222 comm_timer_disable(timer);
2223 /* Free the sub struct timer->ev_timer derived from the super struct timer.
2224 * i.e. assert(timer == timer->ev_timer)
2226 ub_event_free(timer->ev_timer->ev);
2227 free(timer->ev_timer);
2231 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2233 struct comm_timer* tm = (struct comm_timer*)arg;
2234 if(!(event&UB_EV_TIMEOUT))
2236 ub_comm_base_now(tm->ev_timer->base);
2237 tm->ev_timer->enabled = 0;
2238 fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2239 (*tm->callback)(tm->cb_arg);
2243 comm_timer_is_set(struct comm_timer* timer)
2245 return (int)timer->ev_timer->enabled;
2249 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
2251 return sizeof(struct internal_timer);
2255 comm_signal_create(struct comm_base* base,
2256 void (*callback)(int, void*), void* cb_arg)
2258 struct comm_signal* com = (struct comm_signal*)malloc(
2259 sizeof(struct comm_signal));
2261 log_err("malloc failed");
2265 com->callback = callback;
2266 com->cb_arg = cb_arg;
2267 com->ev_signal = NULL;
2272 comm_signal_callback(int sig, short event, void* arg)
2274 struct comm_signal* comsig = (struct comm_signal*)arg;
2275 if(!(event & UB_EV_SIGNAL))
2277 ub_comm_base_now(comsig->base);
2278 fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2279 (*comsig->callback)(sig, comsig->cb_arg);
2283 comm_signal_bind(struct comm_signal* comsig, int sig)
2285 struct internal_signal* entry = (struct internal_signal*)calloc(1,
2286 sizeof(struct internal_signal));
2288 log_err("malloc failed");
2292 /* add signal event */
2293 entry->ev = ub_signal_new(comsig->base->eb->base, sig,
2294 comm_signal_callback, comsig);
2295 if(entry->ev == NULL) {
2296 log_err("Could not create signal event");
2300 if(ub_signal_add(entry->ev, NULL) != 0) {
2301 log_err("Could not add signal handler");
2302 ub_event_free(entry->ev);
2306 /* link into list */
2307 entry->next = comsig->ev_signal;
2308 comsig->ev_signal = entry;
2313 comm_signal_delete(struct comm_signal* comsig)
2315 struct internal_signal* p, *np;
2318 p=comsig->ev_signal;
2321 ub_signal_del(p->ev);
2322 ub_event_free(p->ev);