2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file has functions to get queries from clients.
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
46 #ifdef USE_TCP_FASTOPEN
47 #include <netinet/tcp.h>
49 #include "services/listen_dnsport.h"
50 #include "services/outside_network.h"
51 #include "util/netevent.h"
53 #include "util/config_file.h"
54 #include "util/net_help.h"
55 #include "sldns/sbuffer.h"
56 #include "services/mesh.h"
57 #include "util/fptr_wlist.h"
58 #include "util/locks.h"
70 #include <systemd/sd-daemon.h>
73 /** number of queued TCP connections for listen() */
74 #define TCP_BACKLOG 256
76 /** number of simultaneous requests a client can have */
77 #define TCP_MAX_REQ_SIMULTANEOUS 32
79 #ifndef THREADS_DISABLED
80 /** lock on the counter of stream buffer memory */
81 static lock_basic_type stream_wait_count_lock;
83 /** size (in bytes) of stream wait buffers */
84 static size_t stream_wait_count = 0;
85 /** is the lock initialised for stream wait buffers */
86 static int stream_wait_lock_inited = 0;
89 * Debug print of the getaddrinfo returned address.
90 * @param addr: the address returned.
93 verbose_print_addr(struct addrinfo *addr)
95 if(verbosity >= VERB_ALGO) {
97 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
99 if(addr->ai_family == AF_INET6)
100 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
103 if(inet_ntop(addr->ai_family, sinaddr, buf,
104 (socklen_t)sizeof(buf)) == 0) {
105 (void)strlcpy(buf, "(null)", sizeof(buf));
107 buf[sizeof(buf)-1] = 0;
108 verbose(VERB_ALGO, "creating %s%s socket %s %d",
109 addr->ai_socktype==SOCK_DGRAM?"udp":
110 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
111 addr->ai_family==AF_INET?"4":
112 addr->ai_family==AF_INET6?"6":
114 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
120 systemd_get_activated(int family, int socktype, int listen,
121 struct sockaddr *addr, socklen_t addrlen,
127 const char* listen_pid, *listen_fds;
129 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
131 if((r = sd_booted()) < 1) {
133 log_warn("systemd is not running");
135 log_err("systemd sd_booted(): %s", strerror(-r));
139 listen_pid = getenv("LISTEN_PID");
140 listen_fds = getenv("LISTEN_FDS");
143 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
148 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
152 if((r = sd_listen_fds(0)) < 1) {
154 log_warn("systemd: did not return socket, check unit configuration");
156 log_err("systemd sd_listen_fds(): %s", strerror(-r));
160 for(i = 0; i < r; i++) {
161 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
162 s = SD_LISTEN_FDS_START + i;
168 log_err_addr("systemd sd_listen_fds()",
170 (struct sockaddr_storage *)addr, addrlen);
172 log_err("systemd sd_listen_fds(): %s", path);
179 create_udp_sock(int family, int socktype, struct sockaddr* addr,
180 socklen_t addrlen, int v6only, int* inuse, int* noproto,
181 int rcv, int snd, int listen, int* reuseport, int transparent,
182 int freebind, int use_systemd, int dscp)
186 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
190 int mtu = IPV6_MIN_MTU;
192 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
195 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
201 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
204 #if !defined(IP_FREEBIND)
208 int got_fd_from_systemd = 0;
212 && (s = systemd_get_activated(family, socktype, -1, addr,
213 addrlen, NULL)) == -1)) {
217 if((s = socket(family, socktype, 0)) == -1) {
220 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
224 log_err("can't create socket: %s", strerror(errno));
226 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
227 WSAGetLastError() == WSAEPROTONOSUPPORT) {
231 log_err("can't create socket: %s",
232 wsa_strerror(WSAGetLastError()));
239 got_fd_from_systemd = 1;
244 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
245 (socklen_t)sizeof(on)) < 0) {
247 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
249 if(errno != ENOSYS) {
256 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
257 wsa_strerror(WSAGetLastError()));
264 #endif /* SO_REUSEADDR */
266 # ifdef SO_REUSEPORT_LB
267 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
268 * like SO_REUSEPORT on Linux. This is what the users want
269 * with the config option in unbound.conf; if we actually
270 * need local address and port reuse they'll also need to
271 * have SO_REUSEPORT set for them, assume it was _LB they want.
273 if (reuseport && *reuseport &&
274 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
275 (socklen_t)sizeof(on)) < 0) {
277 if(errno != ENOPROTOOPT || verbosity >= 3)
278 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
281 /* this option is not essential, we can continue */
284 # else /* no SO_REUSEPORT_LB */
286 /* try to set SO_REUSEPORT so that incoming
287 * queries are distributed evenly among the receiving threads.
288 * Each thread must have its own socket bound to the same port,
289 * with SO_REUSEPORT set on each socket.
291 if (reuseport && *reuseport &&
292 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
293 (socklen_t)sizeof(on)) < 0) {
295 if(errno != ENOPROTOOPT || verbosity >= 3)
296 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
299 /* this option is not essential, we can continue */
302 # endif /* SO_REUSEPORT_LB */
305 #endif /* defined(SO_REUSEPORT) */
306 #ifdef IP_TRANSPARENT
308 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
309 (socklen_t)sizeof(on)) < 0) {
310 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
313 #elif defined(IP_BINDANY)
315 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
316 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
317 (void*)&on, (socklen_t)sizeof(on)) < 0) {
318 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
319 (family==AF_INET6?"V6":""), strerror(errno));
321 #elif defined(SO_BINDANY)
323 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
324 (socklen_t)sizeof(on)) < 0) {
325 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
328 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
332 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
333 (socklen_t)sizeof(on)) < 0) {
334 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
337 #endif /* IP_FREEBIND */
341 socklen_t slen = (socklen_t)sizeof(got);
342 # ifdef SO_RCVBUFFORCE
343 /* Linux specific: try to use root permission to override
344 * system limits on rcvbuf. The limit is stored in
345 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
346 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
347 (socklen_t)sizeof(rcv)) < 0) {
350 log_err("setsockopt(..., SO_RCVBUFFORCE, "
351 "...) failed: %s", strerror(errno));
354 log_err("setsockopt(..., SO_RCVBUFFORCE, "
356 wsa_strerror(WSAGetLastError()));
363 # endif /* SO_RCVBUFFORCE */
364 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
365 (socklen_t)sizeof(rcv)) < 0) {
367 log_err("setsockopt(..., SO_RCVBUF, "
368 "...) failed: %s", strerror(errno));
371 log_err("setsockopt(..., SO_RCVBUF, "
373 wsa_strerror(WSAGetLastError()));
380 /* check if we got the right thing or if system
381 * reduced to some system max. Warn if so */
382 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
383 &slen) >= 0 && got < rcv/2) {
384 log_warn("so-rcvbuf %u was not granted. "
385 "Got %u. To fix: start with "
386 "root permissions(linux) or sysctl "
387 "bigger net.core.rmem_max(linux) or "
388 "kern.ipc.maxsockbuf(bsd) values.",
389 (unsigned)rcv, (unsigned)got);
391 # ifdef SO_RCVBUFFORCE
394 #endif /* SO_RCVBUF */
396 /* first do RCVBUF as the receive buffer is more important */
400 socklen_t slen = (socklen_t)sizeof(got);
401 # ifdef SO_SNDBUFFORCE
402 /* Linux specific: try to use root permission to override
403 * system limits on sndbuf. The limit is stored in
404 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
405 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
406 (socklen_t)sizeof(snd)) < 0) {
409 log_err("setsockopt(..., SO_SNDBUFFORCE, "
410 "...) failed: %s", strerror(errno));
413 log_err("setsockopt(..., SO_SNDBUFFORCE, "
415 wsa_strerror(WSAGetLastError()));
422 # endif /* SO_SNDBUFFORCE */
423 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
424 (socklen_t)sizeof(snd)) < 0) {
426 log_err("setsockopt(..., SO_SNDBUF, "
427 "...) failed: %s", strerror(errno));
430 log_err("setsockopt(..., SO_SNDBUF, "
432 wsa_strerror(WSAGetLastError()));
439 /* check if we got the right thing or if system
440 * reduced to some system max. Warn if so */
441 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
442 &slen) >= 0 && got < snd/2) {
443 log_warn("so-sndbuf %u was not granted. "
444 "Got %u. To fix: start with "
445 "root permissions(linux) or sysctl "
446 "bigger net.core.wmem_max(linux) or "
447 "kern.ipc.maxsockbuf(bsd) values.",
448 (unsigned)snd, (unsigned)got);
450 # ifdef SO_SNDBUFFORCE
453 #endif /* SO_SNDBUF */
455 err = set_ip_dscp(s, family, dscp);
457 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
458 if(family == AF_INET6) {
459 # if defined(IPV6_V6ONLY)
461 int val=(v6only==2)?0:1;
462 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
463 (void*)&val, (socklen_t)sizeof(val)) < 0) {
465 log_err("setsockopt(..., IPV6_V6ONLY"
466 ", ...) failed: %s", strerror(errno));
469 log_err("setsockopt(..., IPV6_V6ONLY"
471 wsa_strerror(WSAGetLastError()));
480 # if defined(IPV6_USE_MIN_MTU)
482 * There is no fragmentation of IPv6 datagrams
483 * during forwarding in the network. Therefore
484 * we do not send UDP datagrams larger than
485 * the minimum IPv6 MTU of 1280 octets. The
486 * EDNS0 message length can be larger if the
487 * network stack supports IPV6_USE_MIN_MTU.
489 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
490 (void*)&on, (socklen_t)sizeof(on)) < 0) {
492 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
493 "...) failed: %s", strerror(errno));
496 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
498 wsa_strerror(WSAGetLastError()));
505 # elif defined(IPV6_MTU)
507 * On Linux, to send no larger than 1280, the PMTUD is
508 * disabled by default for datagrams anyway, so we set
511 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
512 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
514 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
518 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
519 wsa_strerror(WSAGetLastError()));
526 # endif /* IPv6 MTU */
527 } else if(family == AF_INET) {
528 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
529 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
530 * PMTU information is not accepted, but fragmentation is allowed
531 * if and only if the packet size exceeds the outgoing interface MTU
532 * (and also uses the interface mtu to determine the size of the packets).
533 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
534 * FreeBSD already has same semantics without setting the option. */
537 # if defined(IP_PMTUDISC_OMIT)
538 action = IP_PMTUDISC_OMIT;
539 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
540 &action, (socklen_t)sizeof(action)) < 0) {
542 if (errno != EINVAL) {
543 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
562 action = IP_PMTUDISC_DONT;
563 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
564 &action, (socklen_t)sizeof(action)) < 0) {
565 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
577 # elif defined(IP_DONTFRAG)
579 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
580 &off, (socklen_t)sizeof(off)) < 0) {
581 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
592 # endif /* IPv4 MTU */
596 !got_fd_from_systemd &&
598 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
603 *inuse = (errno == EADDRINUSE);
604 /* detect freebsd jail with no ipv6 permission */
605 if(family==AF_INET6 && errno==EINVAL)
607 else if(errno != EADDRINUSE &&
608 !(errno == EACCES && verbosity < 4 && !listen)
610 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
613 log_err_addr("can't bind socket", strerror(errno),
614 (struct sockaddr_storage*)addr, addrlen);
616 #endif /* EADDRINUSE */
618 #else /* USE_WINSOCK */
619 if(WSAGetLastError() != WSAEADDRINUSE &&
620 WSAGetLastError() != WSAEADDRNOTAVAIL &&
621 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
622 log_err_addr("can't bind socket",
623 wsa_strerror(WSAGetLastError()),
624 (struct sockaddr_storage*)addr, addrlen);
627 #endif /* USE_WINSOCK */
630 if(!fd_set_nonblock(s)) {
644 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
645 int* reuseport, int transparent, int mss, int freebind, int use_systemd, int dscp)
649 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
653 int got_fd_from_systemd = 0;
655 #ifdef USE_TCP_FASTOPEN
658 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
661 #if !defined(IP_FREEBIND)
664 verbose_print_addr(addr);
669 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
670 addr->ai_addr, addr->ai_addrlen,
675 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
677 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
681 log_err("can't create socket: %s", strerror(errno));
683 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
684 WSAGetLastError() == WSAEPROTONOSUPPORT) {
688 log_err("can't create socket: %s",
689 wsa_strerror(WSAGetLastError()));
694 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
695 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
696 (socklen_t)sizeof(mss)) < 0) {
698 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
701 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
702 wsa_strerror(WSAGetLastError()));
706 " tcp socket mss set to %d", mss);
709 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
710 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
714 got_fd_from_systemd = 1;
718 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
719 (socklen_t)sizeof(on)) < 0) {
721 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
725 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
726 wsa_strerror(WSAGetLastError()));
731 #endif /* SO_REUSEADDR */
733 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
734 (socklen_t)sizeof(on)) < 0) {
735 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
738 #endif /* IP_FREEBIND */
740 /* try to set SO_REUSEPORT so that incoming
741 * connections are distributed evenly among the receiving threads.
742 * Each thread must have its own socket bound to the same port,
743 * with SO_REUSEPORT set on each socket.
745 if (reuseport && *reuseport &&
746 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
747 (socklen_t)sizeof(on)) < 0) {
749 if(errno != ENOPROTOOPT || verbosity >= 3)
750 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
753 /* this option is not essential, we can continue */
758 #endif /* defined(SO_REUSEPORT) */
759 #if defined(IPV6_V6ONLY)
760 if(addr->ai_family == AF_INET6 && v6only) {
761 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
762 (void*)&on, (socklen_t)sizeof(on)) < 0) {
764 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
768 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
769 wsa_strerror(WSAGetLastError()));
777 #endif /* IPV6_V6ONLY */
778 #ifdef IP_TRANSPARENT
780 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
781 (socklen_t)sizeof(on)) < 0) {
782 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
785 #elif defined(IP_BINDANY)
787 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
788 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
789 (void*)&on, (socklen_t)sizeof(on)) < 0) {
790 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
791 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
793 #elif defined(SO_BINDANY)
795 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
797 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
800 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
801 err = set_ip_dscp(s, addr->ai_family, dscp);
803 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
806 !got_fd_from_systemd &&
808 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
810 /* detect freebsd jail with no ipv6 permission */
811 if(addr->ai_family==AF_INET6 && errno==EINVAL)
814 log_err_addr("can't bind socket", strerror(errno),
815 (struct sockaddr_storage*)addr->ai_addr,
820 log_err_addr("can't bind socket",
821 wsa_strerror(WSAGetLastError()),
822 (struct sockaddr_storage*)addr->ai_addr,
828 if(!fd_set_nonblock(s)) {
836 if(listen(s, TCP_BACKLOG) == -1) {
838 log_err("can't listen: %s", strerror(errno));
841 log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
846 #ifdef USE_TCP_FASTOPEN
847 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
848 against IP spoofing attacks as suggested in RFC7413 */
850 /* OS X implementation only supports qlen of 1 via this call. Actual
851 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
854 /* 5 is recommended on linux */
857 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
858 sizeof(qlen))) == -1 ) {
860 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
861 disabled, except when verbosity enabled for debugging */
862 if(errno != ENOPROTOOPT || verbosity >= 3) {
865 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
867 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
878 set_ip_dscp(int socket, int addrfamily, int dscp)
887 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0)
888 return sock_strerror(errno);
891 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
892 return sock_strerror(errno);
900 sock_strerror(int errn)
902 return strerror(errn);
906 sock_close(int socket)
913 sock_strerror(int ATTR_UNUSED(errn))
915 return wsa_strerror(WSAGetLastError());
919 sock_close(int socket)
924 # endif /* USE_WINSOCK */
927 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
932 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
938 struct sockaddr_un usock;
943 verbose(VERB_ALGO, "creating unix socket %s", path);
944 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
945 /* this member exists on BSDs, not Linux */
946 usock.sun_len = (unsigned)sizeof(usock);
948 usock.sun_family = AF_LOCAL;
949 /* length is 92-108, 104 on FreeBSD */
950 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
952 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
953 log_err("Cannot create local socket %s (%s)",
954 path, strerror(errno));
958 if (unlink(path) && errno != ENOENT) {
959 /* The socket already exists and cannot be removed */
960 log_err("Cannot remove old local socket %s (%s)",
961 path, strerror(errno));
965 if (bind(s, (struct sockaddr *)&usock,
966 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
967 log_err("Cannot bind local socket %s (%s)",
968 path, strerror(errno));
972 if (!fd_set_nonblock(s)) {
973 log_err("Cannot set non-blocking mode");
977 if (listen(s, TCP_BACKLOG) == -1) {
978 log_err("can't listen: %s", strerror(errno));
982 (void)noproto; /*unused*/
999 log_err("Local sockets are not supported");
1007 * Create socket from getaddrinfo results
1010 make_sock(int stype, const char* ifname, const char* port,
1011 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1012 int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd, int dscp)
1014 struct addrinfo *res = NULL;
1015 int r, s, inuse, noproto;
1016 hints->ai_socktype = stype;
1018 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
1020 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
1021 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
1025 log_err("node %s:%s getaddrinfo: %s %s",
1026 ifname?ifname:"default", port, gai_strerror(r),
1028 r==EAI_SYSTEM?(char*)strerror(errno):""
1035 if(stype == SOCK_DGRAM) {
1036 verbose_print_addr(res);
1037 s = create_udp_sock(res->ai_family, res->ai_socktype,
1038 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
1039 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
1040 reuseport, transparent, freebind, use_systemd, dscp);
1041 if(s == -1 && inuse) {
1042 log_err("bind: address already in use");
1043 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1047 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1048 transparent, tcp_mss, freebind, use_systemd, dscp);
1049 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1057 /** make socket and first see if ifname contains port override info */
1059 make_sock_port(int stype, const char* ifname, const char* port,
1060 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1061 int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd, int dscp)
1063 char* s = strchr(ifname, '@');
1065 /* override port with ifspec@port */
1068 if((size_t)(s-ifname) >= sizeof(newif)) {
1069 log_err("ifname too long: %s", ifname);
1073 if(strlen(s+1) >= sizeof(p)) {
1074 log_err("portnumber too long: %s", ifname);
1078 (void)strlcpy(newif, ifname, sizeof(newif));
1079 newif[s-ifname] = 0;
1080 (void)strlcpy(p, s+1, sizeof(p));
1082 return make_sock(stype, newif, p, hints, v6only, noip6,
1083 rcv, snd, reuseport, transparent, tcp_mss, freebind, use_systemd, dscp);
1085 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1086 reuseport, transparent, tcp_mss, freebind, use_systemd, dscp);
1090 * Add port to open ports list.
1091 * @param list: list head. changed.
1093 * @param ftype: if fd is UDP.
1094 * @return false on failure. list in unchanged then.
1097 port_insert(struct listen_port** list, int s, enum listen_type ftype)
1099 struct listen_port* item = (struct listen_port*)malloc(
1100 sizeof(struct listen_port));
1105 item->ftype = ftype;
1110 /** set fd to receive source address packet info */
1112 set_recvpktinfo(int s, int family)
1114 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1119 if(family == AF_INET6) {
1120 # ifdef IPV6_RECVPKTINFO
1121 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1122 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1123 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1127 # elif defined(IPV6_PKTINFO)
1128 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1129 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1130 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1135 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
1136 "disable interface-automatic or do-ip6 in config");
1138 # endif /* defined IPV6_RECVPKTINFO */
1140 } else if(family == AF_INET) {
1142 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1143 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1144 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1148 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1149 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1150 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1151 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1156 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1157 "interface-automatic or do-ip4 in config");
1159 # endif /* IP_PKTINFO */
1165 /** see if interface is ssl, its port number == the ssl port number */
1167 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1168 struct config_strlist* tls_additional_port)
1170 struct config_strlist* s;
1171 char* p = strchr(ifname, '@');
1172 if(!p && atoi(port) == ssl_port)
1174 if(p && atoi(p+1) == ssl_port)
1176 for(s = tls_additional_port; s; s = s->next) {
1177 if(p && atoi(p+1) == atoi(s->str))
1179 if(!p && atoi(port) == atoi(s->str))
1186 * Helper for ports_open. Creates one interface (or NULL for default).
1187 * @param ifname: The interface ip address.
1188 * @param do_auto: use automatic interface detection.
1189 * If enabled, then ifname must be the wildcard name.
1190 * @param do_udp: if udp should be used.
1191 * @param do_tcp: if udp should be used.
1192 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1193 * @param port: Port number to use (as string).
1194 * @param list: list of open ports, appended to, changed to point to list head.
1195 * @param rcv: receive buffer size for UDP
1196 * @param snd: send buffer size for UDP
1197 * @param ssl_port: ssl service port number
1198 * @param tls_additional_port: list of additional ssl service port numbers.
1199 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1200 * set to false on exit if reuseport failed due to no kernel support.
1201 * @param transparent: set IP_TRANSPARENT socket option.
1202 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1203 * @param freebind: set IP_FREEBIND socket option.
1204 * @param use_systemd: if true, fetch sockets from systemd.
1205 * @param dnscrypt_port: dnscrypt service port number
1206 * @param dscp: DSCP to use.
1207 * @return: returns false on error.
1210 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1211 struct addrinfo *hints, const char* port, struct listen_port** list,
1212 size_t rcv, size_t snd, int ssl_port,
1213 struct config_strlist* tls_additional_port, int* reuseport,
1214 int transparent, int tcp_mss, int freebind, int use_systemd,
1215 int dnscrypt_port, int dscp)
1219 int is_dnscrypt = ((strchr(ifname, '@') &&
1220 atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1221 (!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1223 int is_dnscrypt = 0;
1224 (void)dnscrypt_port;
1227 if(!do_udp && !do_tcp)
1230 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1231 &noip6, rcv, snd, reuseport, transparent,
1232 tcp_mss, freebind, use_systemd, dscp)) == -1) {
1234 log_warn("IPv6 protocol not available");
1239 /* getting source addr packet info is highly non-portable */
1240 if(!set_recvpktinfo(s, hints->ai_family)) {
1248 if(!port_insert(list, s,
1249 is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) {
1258 /* regular udp socket */
1259 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1260 &noip6, rcv, snd, reuseport, transparent,
1261 tcp_mss, freebind, use_systemd, dscp)) == -1) {
1263 log_warn("IPv6 protocol not available");
1268 if(!port_insert(list, s,
1269 is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) {
1279 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1280 tls_additional_port);
1281 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1282 &noip6, 0, 0, reuseport, transparent, tcp_mss,
1283 freebind, use_systemd, dscp)) == -1) {
1285 /*log_warn("IPv6 protocol not available");*/
1291 verbose(VERB_ALGO, "setup TCP for SSL service");
1292 if(!port_insert(list, s, is_ssl?listen_type_ssl:
1293 (is_dnscrypt?listen_type_tcp_dnscrypt:listen_type_tcp))) {
1306 * Add items to commpoint list in front.
1307 * @param c: commpoint to add.
1308 * @param front: listen struct.
1309 * @return: false on failure.
1312 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1314 struct listen_list* item = (struct listen_list*)malloc(
1315 sizeof(struct listen_list));
1319 item->next = front->cps;
1324 struct listen_dnsport*
1325 listen_create(struct comm_base* base, struct listen_port* ports,
1326 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1327 struct tcl_list* tcp_conn_limit, void* sslctx,
1328 struct dt_env* dtenv, comm_point_callback_type* cb, void *cb_arg)
1330 struct listen_dnsport* front = (struct listen_dnsport*)
1331 malloc(sizeof(struct listen_dnsport));
1335 front->udp_buff = sldns_buffer_new(bufsize);
1337 front->dnscrypt_udp_buff = NULL;
1339 if(!front->udp_buff) {
1343 if(!stream_wait_lock_inited) {
1344 lock_basic_init(&stream_wait_count_lock);
1345 stream_wait_lock_inited = 1;
1348 /* create comm points as needed */
1350 struct comm_point* cp = NULL;
1351 if(ports->ftype == listen_type_udp ||
1352 ports->ftype == listen_type_udp_dnscrypt)
1353 cp = comm_point_create_udp(base, ports->fd,
1354 front->udp_buff, cb, cb_arg);
1355 else if(ports->ftype == listen_type_tcp ||
1356 ports->ftype == listen_type_tcp_dnscrypt)
1357 cp = comm_point_create_tcp(base, ports->fd,
1358 tcp_accept_count, tcp_idle_timeout,
1359 tcp_conn_limit, bufsize, front->udp_buff,
1361 else if(ports->ftype == listen_type_ssl) {
1362 cp = comm_point_create_tcp(base, ports->fd,
1363 tcp_accept_count, tcp_idle_timeout,
1364 tcp_conn_limit, bufsize, front->udp_buff,
1367 } else if(ports->ftype == listen_type_udpancil ||
1368 ports->ftype == listen_type_udpancil_dnscrypt)
1369 cp = comm_point_create_udp_ancil(base, ports->fd,
1370 front->udp_buff, cb, cb_arg);
1372 log_err("can't create commpoint");
1373 listen_delete(front);
1377 cp->do_not_close = 1;
1379 if (ports->ftype == listen_type_udp_dnscrypt ||
1380 ports->ftype == listen_type_tcp_dnscrypt ||
1381 ports->ftype == listen_type_udpancil_dnscrypt) {
1383 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1384 if(!cp->dnscrypt_buffer) {
1385 log_err("can't alloc dnscrypt_buffer");
1386 comm_point_delete(cp);
1387 listen_delete(front);
1390 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1393 if(!listen_cp_insert(cp, front)) {
1394 log_err("malloc failed");
1395 comm_point_delete(cp);
1396 listen_delete(front);
1399 ports = ports->next;
1402 log_err("Could not open sockets to accept queries.");
1403 listen_delete(front);
1411 listen_list_delete(struct listen_list* list)
1413 struct listen_list *p = list, *pn;
1416 comm_point_delete(p->com);
1423 listen_delete(struct listen_dnsport* front)
1427 listen_list_delete(front->cps);
1429 if(front->dnscrypt_udp_buff &&
1430 front->udp_buff != front->dnscrypt_udp_buff) {
1431 sldns_buffer_free(front->dnscrypt_udp_buff);
1434 sldns_buffer_free(front->udp_buff);
1436 if(stream_wait_lock_inited) {
1437 stream_wait_lock_inited = 0;
1438 lock_basic_destroy(&stream_wait_count_lock);
1443 listening_ports_open(struct config_file* cfg, int* reuseport)
1445 struct listen_port* list = NULL;
1446 struct addrinfo hints;
1447 int i, do_ip4, do_ip6;
1448 int do_tcp, do_auto;
1450 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1451 do_ip4 = cfg->do_ip4;
1452 do_ip6 = cfg->do_ip6;
1453 do_tcp = cfg->do_tcp;
1454 do_auto = cfg->if_automatic && cfg->do_udp;
1455 if(cfg->incoming_num_tcp == 0)
1459 memset(&hints, 0, sizeof(hints));
1460 hints.ai_flags = AI_PASSIVE;
1461 /* no name lookups on our listening ports */
1462 if(cfg->num_ifs > 0)
1463 hints.ai_flags |= AI_NUMERICHOST;
1464 hints.ai_family = AF_UNSPEC;
1468 if(!do_ip4 && !do_ip6) {
1471 /* create ip4 and ip6 ports so that return addresses are nice. */
1472 if(do_auto || cfg->num_ifs == 0) {
1474 hints.ai_family = AF_INET6;
1475 if(!ports_create_if(do_auto?"::0":"::1",
1476 do_auto, cfg->do_udp, do_tcp,
1477 &hints, portbuf, &list,
1478 cfg->so_rcvbuf, cfg->so_sndbuf,
1479 cfg->ssl_port, cfg->tls_additional_port,
1480 reuseport, cfg->ip_transparent,
1481 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1482 cfg->dnscrypt_port, cfg->ip_dscp)) {
1483 listening_ports_free(list);
1488 hints.ai_family = AF_INET;
1489 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1490 do_auto, cfg->do_udp, do_tcp,
1491 &hints, portbuf, &list,
1492 cfg->so_rcvbuf, cfg->so_sndbuf,
1493 cfg->ssl_port, cfg->tls_additional_port,
1494 reuseport, cfg->ip_transparent,
1495 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1496 cfg->dnscrypt_port, cfg->ip_dscp)) {
1497 listening_ports_free(list);
1501 } else for(i = 0; i<cfg->num_ifs; i++) {
1502 if(str_is_ip6(cfg->ifs[i])) {
1505 hints.ai_family = AF_INET6;
1506 if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1507 do_tcp, &hints, portbuf, &list,
1508 cfg->so_rcvbuf, cfg->so_sndbuf,
1509 cfg->ssl_port, cfg->tls_additional_port,
1510 reuseport, cfg->ip_transparent,
1511 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1512 cfg->dnscrypt_port, cfg->ip_dscp)) {
1513 listening_ports_free(list);
1519 hints.ai_family = AF_INET;
1520 if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp,
1521 do_tcp, &hints, portbuf, &list,
1522 cfg->so_rcvbuf, cfg->so_sndbuf,
1523 cfg->ssl_port, cfg->tls_additional_port,
1524 reuseport, cfg->ip_transparent,
1525 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1526 cfg->dnscrypt_port, cfg->ip_dscp)) {
1527 listening_ports_free(list);
1535 void listening_ports_free(struct listen_port* list)
1537 struct listen_port* nx;
1540 if(list->fd != -1) {
1544 closesocket(list->fd);
1552 size_t listen_get_mem(struct listen_dnsport* listen)
1554 struct listen_list* p;
1555 size_t s = sizeof(*listen) + sizeof(*listen->base) +
1556 sizeof(*listen->udp_buff) +
1557 sldns_buffer_capacity(listen->udp_buff);
1559 s += sizeof(*listen->dnscrypt_udp_buff);
1560 if(listen->udp_buff != listen->dnscrypt_udp_buff){
1561 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1564 for(p = listen->cps; p; p = p->next) {
1566 s += comm_point_get_mem(p->com);
1571 void listen_stop_accept(struct listen_dnsport* listen)
1573 /* do not stop the ones that have no tcp_free list
1574 * (they have already stopped listening) */
1575 struct listen_list* p;
1576 for(p=listen->cps; p; p=p->next) {
1577 if(p->com->type == comm_tcp_accept &&
1578 p->com->tcp_free != NULL) {
1579 comm_point_stop_listening(p->com);
1584 void listen_start_accept(struct listen_dnsport* listen)
1586 /* do not start the ones that have no tcp_free list, it is no
1587 * use to listen to them because they have no free tcp handlers */
1588 struct listen_list* p;
1589 for(p=listen->cps; p; p=p->next) {
1590 if(p->com->type == comm_tcp_accept &&
1591 p->com->tcp_free != NULL) {
1592 comm_point_start_listening(p->com, -1, -1);
1597 struct tcp_req_info*
1598 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1600 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1602 log_err("malloc failure for new stream outoforder processing structure");
1605 memset(req, 0, sizeof(*req));
1606 req->spool_buffer = spoolbuf;
1611 tcp_req_info_delete(struct tcp_req_info* req)
1614 tcp_req_info_clear(req);
1615 /* cp is pointer back to commpoint that owns this struct and
1616 * called delete on us */
1617 /* spool_buffer is shared udp buffer, not deleted here */
1621 void tcp_req_info_clear(struct tcp_req_info* req)
1623 struct tcp_req_open_item* open, *nopen;
1624 struct tcp_req_done_item* item, *nitem;
1627 /* free outstanding request mesh reply entries */
1628 open = req->open_req_list;
1631 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1635 req->open_req_list = NULL;
1636 req->num_open_req = 0;
1638 /* free pending writable result packets */
1639 item = req->done_req_list;
1642 lock_basic_lock(&stream_wait_count_lock);
1643 stream_wait_count -= (sizeof(struct tcp_req_done_item)
1645 lock_basic_unlock(&stream_wait_count_lock);
1650 req->done_req_list = NULL;
1651 req->num_done_req = 0;
1652 req->read_is_closed = 0;
1656 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1658 struct tcp_req_open_item* open, *prev = NULL;
1659 if(!req || !m) return;
1660 open = req->open_req_list;
1662 if(open->mesh_state == m) {
1663 struct tcp_req_open_item* next;
1664 if(prev) prev->next = open->next;
1665 else req->open_req_list = open->next;
1666 /* caller has to manage the mesh state reply entry */
1669 req->num_open_req --;
1680 /** setup listening for read or write */
1682 tcp_req_info_setup_listen(struct tcp_req_info* req)
1687 if(req->cp->tcp_byte_count != 0) {
1688 /* cannot change, halfway through */
1692 if(!req->cp->tcp_is_reading)
1694 if(req->num_open_req + req->num_done_req < TCP_MAX_REQ_SIMULTANEOUS &&
1695 !req->read_is_closed)
1699 req->cp->tcp_is_reading = 0;
1700 comm_point_stop_listening(req->cp);
1701 comm_point_start_listening(req->cp, -1,
1702 req->cp->tcp_timeout_msec);
1704 req->cp->tcp_is_reading = 1;
1705 comm_point_stop_listening(req->cp);
1706 comm_point_start_listening(req->cp, -1,
1707 req->cp->tcp_timeout_msec);
1708 /* and also read it (from SSL stack buffers), so
1709 * no event read event is expected since the remainder of
1710 * the TLS frame is sitting in the buffers. */
1711 req->read_again = 1;
1713 comm_point_stop_listening(req->cp);
1714 comm_point_start_listening(req->cp, -1,
1715 req->cp->tcp_timeout_msec);
1716 comm_point_listen_for_rw(req->cp, 0, 0);
1720 /** remove first item from list of pending results */
1721 static struct tcp_req_done_item*
1722 tcp_req_info_pop_done(struct tcp_req_info* req)
1724 struct tcp_req_done_item* item;
1725 log_assert(req->num_done_req > 0 && req->done_req_list);
1726 item = req->done_req_list;
1727 lock_basic_lock(&stream_wait_count_lock);
1728 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1729 lock_basic_unlock(&stream_wait_count_lock);
1730 req->done_req_list = req->done_req_list->next;
1731 req->num_done_req --;
1735 /** Send given buffer and setup to write */
1737 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1740 sldns_buffer_clear(req->cp->buffer);
1741 sldns_buffer_write(req->cp->buffer, buf, len);
1742 sldns_buffer_flip(req->cp->buffer);
1744 req->cp->tcp_is_reading = 0; /* we are now writing */
1747 /** pick up the next result and start writing it to the channel */
1749 tcp_req_pickup_next_result(struct tcp_req_info* req)
1751 if(req->num_done_req > 0) {
1752 /* unlist the done item from the list of pending results */
1753 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1754 tcp_req_info_start_write_buf(req, item->buf, item->len);
1760 /** the read channel has closed */
1762 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1764 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1765 /* reset byte count for (potential) partial read */
1766 req->cp->tcp_byte_count = 0;
1767 /* if we still have results to write, pick up next and write it */
1768 if(req->num_done_req != 0) {
1769 tcp_req_pickup_next_result(req);
1770 tcp_req_info_setup_listen(req);
1773 /* if nothing to do, this closes the connection */
1774 if(req->num_open_req == 0 && req->num_done_req == 0)
1776 /* otherwise, we must be waiting for dns resolve, wait with timeout */
1777 req->read_is_closed = 1;
1778 tcp_req_info_setup_listen(req);
1783 tcp_req_info_handle_writedone(struct tcp_req_info* req)
1785 /* back to reading state, we finished this write event */
1786 sldns_buffer_clear(req->cp->buffer);
1787 if(req->num_done_req == 0 && req->read_is_closed) {
1788 /* no more to write and nothing to read, close it */
1789 comm_point_drop_reply(&req->cp->repinfo);
1792 req->cp->tcp_is_reading = 1;
1793 /* see if another result needs writing */
1794 tcp_req_pickup_next_result(req);
1796 /* see if there is more to write, if not stop_listening for writing */
1797 /* see if new requests are allowed, if so, start_listening
1799 tcp_req_info_setup_listen(req);
1803 tcp_req_info_handle_readdone(struct tcp_req_info* req)
1805 struct comm_point* c = req->cp;
1807 /* we want to read up several requests, unless there are
1808 * pending answers */
1812 req->in_worker_handle = 1;
1813 sldns_buffer_set_limit(req->spool_buffer, 0);
1814 /* handle the current request */
1815 /* this calls the worker handle request routine that could give
1816 * a cache response, or localdata response, or drop the reply,
1817 * or schedule a mesh entry for later */
1818 fptr_ok(fptr_whitelist_comm_point(c->callback));
1819 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1820 req->in_worker_handle = 0;
1821 /* there is an answer, put it up. It is already in the
1822 * c->buffer, just send it. */
1823 /* since we were just reading a query, the channel is
1824 * clear to write to */
1826 c->tcp_is_reading = 0;
1827 comm_point_stop_listening(c);
1828 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1831 req->in_worker_handle = 0;
1832 /* it should be waiting in the mesh for recursion.
1833 * If mesh failed to add a new entry and called commpoint_drop_reply.
1834 * Then the mesh state has been cleared. */
1836 /* the reply has been dropped, stream has been closed. */
1839 /* If mesh failed(mallocfail) and called commpoint_send_reply with
1840 * something like servfail then we pick up that reply below. */
1845 sldns_buffer_clear(c->buffer);
1846 /* if pending answers, pick up an answer and start sending it */
1847 tcp_req_pickup_next_result(req);
1849 /* if answers pending, start sending answers */
1850 /* read more requests if we can have more requests */
1851 tcp_req_info_setup_listen(req);
1855 tcp_req_info_add_meshstate(struct tcp_req_info* req,
1856 struct mesh_area* mesh, struct mesh_state* m)
1858 struct tcp_req_open_item* item;
1859 log_assert(req && mesh && m);
1860 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
1862 item->next = req->open_req_list;
1864 item->mesh_state = m;
1865 req->open_req_list = item;
1866 req->num_open_req++;
1870 /** Add a result to the result list. At the end. */
1872 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
1874 struct tcp_req_done_item* last = NULL;
1875 struct tcp_req_done_item* item;
1878 /* see if we have space */
1879 space = sizeof(struct tcp_req_done_item) + len;
1880 lock_basic_lock(&stream_wait_count_lock);
1881 if(stream_wait_count + space > stream_wait_max) {
1882 lock_basic_unlock(&stream_wait_count_lock);
1883 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
1886 stream_wait_count += space;
1887 lock_basic_unlock(&stream_wait_count_lock);
1889 /* find last element */
1890 last = req->done_req_list;
1891 while(last && last->next)
1894 /* create new element */
1895 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
1897 log_err("malloc failure, for stream result list");
1902 item->buf = memdup(buf, len);
1905 log_err("malloc failure, adding reply to stream result list");
1910 if(last) last->next = item;
1911 else req->done_req_list = item;
1912 req->num_done_req++;
1917 tcp_req_info_send_reply(struct tcp_req_info* req)
1919 if(req->in_worker_handle) {
1920 /* reply from mesh is in the spool_buffer */
1921 /* copy now, so that the spool buffer is free for other tasks
1922 * before the callback is done */
1923 sldns_buffer_clear(req->cp->buffer);
1924 sldns_buffer_write(req->cp->buffer,
1925 sldns_buffer_begin(req->spool_buffer),
1926 sldns_buffer_limit(req->spool_buffer));
1927 sldns_buffer_flip(req->cp->buffer);
1931 /* now that the query has been handled, that mesh_reply entry
1932 * should be removed, from the tcp_req_info list,
1933 * the mesh state cleanup removes then with region_cleanup and
1934 * replies_sent true. */
1935 /* see if we can send it straight away (we are not doing
1936 * anything else). If so, copy to buffer and start */
1937 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
1938 /* buffer is free, and was ready to read new query into,
1939 * but we are now going to use it to send this answer */
1940 tcp_req_info_start_write_buf(req,
1941 sldns_buffer_begin(req->spool_buffer),
1942 sldns_buffer_limit(req->spool_buffer));
1943 /* switch to listen to write events */
1944 comm_point_stop_listening(req->cp);
1945 comm_point_start_listening(req->cp, -1,
1946 req->cp->tcp_timeout_msec);
1949 /* queue up the answer behind the others already pending */
1950 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
1951 sldns_buffer_limit(req->spool_buffer))) {
1952 /* drop the connection, we are out of resources */
1953 comm_point_drop_reply(&req->cp->repinfo);
1957 size_t tcp_req_info_get_stream_buffer_size(void)
1960 if(!stream_wait_lock_inited)
1961 return stream_wait_count;
1962 lock_basic_lock(&stream_wait_count_lock);
1963 s = stream_wait_count;
1964 lock_basic_unlock(&stream_wait_count_lock);