2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file has functions to get queries from clients.
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
50 #include "services/listen_dnsport.h"
51 #include "services/outside_network.h"
52 #include "util/netevent.h"
54 #include "util/config_file.h"
55 #include "util/net_help.h"
56 #include "sldns/sbuffer.h"
57 #include "sldns/parseutil.h"
58 #include "services/mesh.h"
59 #include "util/fptr_wlist.h"
60 #include "util/locks.h"
72 #include <systemd/sd-daemon.h>
82 /** number of queued TCP connections for listen() */
83 #define TCP_BACKLOG 256
85 #ifndef THREADS_DISABLED
86 /** lock on the counter of stream buffer memory */
87 static lock_basic_type stream_wait_count_lock;
88 /** lock on the counter of HTTP2 query buffer memory */
89 static lock_basic_type http2_query_buffer_count_lock;
90 /** lock on the counter of HTTP2 response buffer memory */
91 static lock_basic_type http2_response_buffer_count_lock;
93 /** size (in bytes) of stream wait buffers */
94 static size_t stream_wait_count = 0;
95 /** is the lock initialised for stream wait buffers */
96 static int stream_wait_lock_inited = 0;
97 /** size (in bytes) of HTTP2 query buffers */
98 static size_t http2_query_buffer_count = 0;
99 /** is the lock initialised for HTTP2 query buffers */
100 static int http2_query_buffer_lock_inited = 0;
101 /** size (in bytes) of HTTP2 response buffers */
102 static size_t http2_response_buffer_count = 0;
103 /** is the lock initialised for HTTP2 response buffers */
104 static int http2_response_buffer_lock_inited = 0;
107 * Debug print of the getaddrinfo returned address.
108 * @param addr: the address returned.
111 verbose_print_addr(struct addrinfo *addr)
113 if(verbosity >= VERB_ALGO) {
115 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
117 if(addr->ai_family == AF_INET6)
118 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
121 if(inet_ntop(addr->ai_family, sinaddr, buf,
122 (socklen_t)sizeof(buf)) == 0) {
123 (void)strlcpy(buf, "(null)", sizeof(buf));
125 buf[sizeof(buf)-1] = 0;
126 verbose(VERB_ALGO, "creating %s%s socket %s %d",
127 addr->ai_socktype==SOCK_DGRAM?"udp":
128 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
129 addr->ai_family==AF_INET?"4":
130 addr->ai_family==AF_INET6?"6":
132 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
137 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
139 if(verbosity >= VERB_ALGO) {
140 log_info("listing of unbound_socket structure:");
141 verbose_print_addr(ub_sock->addr);
142 log_info("s is: %d, fam is: %s", ub_sock->s, ub_sock->fam == AF_INET?"AF_INET":"AF_INET6");
148 systemd_get_activated(int family, int socktype, int listen,
149 struct sockaddr *addr, socklen_t addrlen,
155 const char* listen_pid, *listen_fds;
157 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
159 if((r = sd_booted()) < 1) {
161 log_warn("systemd is not running");
163 log_err("systemd sd_booted(): %s", strerror(-r));
167 listen_pid = getenv("LISTEN_PID");
168 listen_fds = getenv("LISTEN_FDS");
171 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
176 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
180 if((r = sd_listen_fds(0)) < 1) {
182 log_warn("systemd: did not return socket, check unit configuration");
184 log_err("systemd sd_listen_fds(): %s", strerror(-r));
188 for(i = 0; i < r; i++) {
189 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
190 s = SD_LISTEN_FDS_START + i;
196 log_err_addr("systemd sd_listen_fds()",
198 (struct sockaddr_storage *)addr, addrlen);
200 log_err("systemd sd_listen_fds(): %s", path);
207 create_udp_sock(int family, int socktype, struct sockaddr* addr,
208 socklen_t addrlen, int v6only, int* inuse, int* noproto,
209 int rcv, int snd, int listen, int* reuseport, int transparent,
210 int freebind, int use_systemd, int dscp)
214 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
218 int mtu = IPV6_MIN_MTU;
220 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
223 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
229 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
232 #if !defined(IP_FREEBIND)
236 int got_fd_from_systemd = 0;
240 && (s = systemd_get_activated(family, socktype, -1, addr,
241 addrlen, NULL)) == -1)) {
245 if((s = socket(family, socktype, 0)) == -1) {
248 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
253 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
254 WSAGetLastError() == WSAEPROTONOSUPPORT) {
259 log_err("can't create socket: %s", sock_strerror(errno));
265 got_fd_from_systemd = 1;
270 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
271 (socklen_t)sizeof(on)) < 0) {
272 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
273 sock_strerror(errno));
275 if(errno != ENOSYS) {
288 #endif /* SO_REUSEADDR */
290 # ifdef SO_REUSEPORT_LB
291 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
292 * like SO_REUSEPORT on Linux. This is what the users want
293 * with the config option in unbound.conf; if we actually
294 * need local address and port reuse they'll also need to
295 * have SO_REUSEPORT set for them, assume it was _LB they want.
297 if (reuseport && *reuseport &&
298 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
299 (socklen_t)sizeof(on)) < 0) {
301 if(errno != ENOPROTOOPT || verbosity >= 3)
302 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
305 /* this option is not essential, we can continue */
308 # else /* no SO_REUSEPORT_LB */
310 /* try to set SO_REUSEPORT so that incoming
311 * queries are distributed evenly among the receiving threads.
312 * Each thread must have its own socket bound to the same port,
313 * with SO_REUSEPORT set on each socket.
315 if (reuseport && *reuseport &&
316 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
317 (socklen_t)sizeof(on)) < 0) {
319 if(errno != ENOPROTOOPT || verbosity >= 3)
320 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
323 /* this option is not essential, we can continue */
326 # endif /* SO_REUSEPORT_LB */
329 #endif /* defined(SO_REUSEPORT) */
330 #ifdef IP_TRANSPARENT
332 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
333 (socklen_t)sizeof(on)) < 0) {
334 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
337 #elif defined(IP_BINDANY)
339 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
340 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
341 (void*)&on, (socklen_t)sizeof(on)) < 0) {
342 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
343 (family==AF_INET6?"V6":""), strerror(errno));
345 #elif defined(SO_BINDANY)
347 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
348 (socklen_t)sizeof(on)) < 0) {
349 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
352 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
356 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
357 (socklen_t)sizeof(on)) < 0) {
358 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
361 #endif /* IP_FREEBIND */
365 socklen_t slen = (socklen_t)sizeof(got);
366 # ifdef SO_RCVBUFFORCE
367 /* Linux specific: try to use root permission to override
368 * system limits on rcvbuf. The limit is stored in
369 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
370 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
371 (socklen_t)sizeof(rcv)) < 0) {
373 log_err("setsockopt(..., SO_RCVBUFFORCE, "
374 "...) failed: %s", sock_strerror(errno));
380 # endif /* SO_RCVBUFFORCE */
381 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
382 (socklen_t)sizeof(rcv)) < 0) {
383 log_err("setsockopt(..., SO_RCVBUF, "
384 "...) failed: %s", sock_strerror(errno));
390 /* check if we got the right thing or if system
391 * reduced to some system max. Warn if so */
392 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
393 &slen) >= 0 && got < rcv/2) {
394 log_warn("so-rcvbuf %u was not granted. "
395 "Got %u. To fix: start with "
396 "root permissions(linux) or sysctl "
397 "bigger net.core.rmem_max(linux) or "
398 "kern.ipc.maxsockbuf(bsd) values.",
399 (unsigned)rcv, (unsigned)got);
401 # ifdef SO_RCVBUFFORCE
404 #endif /* SO_RCVBUF */
406 /* first do RCVBUF as the receive buffer is more important */
410 socklen_t slen = (socklen_t)sizeof(got);
411 # ifdef SO_SNDBUFFORCE
412 /* Linux specific: try to use root permission to override
413 * system limits on sndbuf. The limit is stored in
414 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
415 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
416 (socklen_t)sizeof(snd)) < 0) {
418 log_err("setsockopt(..., SO_SNDBUFFORCE, "
419 "...) failed: %s", sock_strerror(errno));
425 # endif /* SO_SNDBUFFORCE */
426 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
427 (socklen_t)sizeof(snd)) < 0) {
428 log_err("setsockopt(..., SO_SNDBUF, "
429 "...) failed: %s", sock_strerror(errno));
435 /* check if we got the right thing or if system
436 * reduced to some system max. Warn if so */
437 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
438 &slen) >= 0 && got < snd/2) {
439 log_warn("so-sndbuf %u was not granted. "
440 "Got %u. To fix: start with "
441 "root permissions(linux) or sysctl "
442 "bigger net.core.wmem_max(linux) or "
443 "kern.ipc.maxsockbuf(bsd) values.",
444 (unsigned)snd, (unsigned)got);
446 # ifdef SO_SNDBUFFORCE
449 #endif /* SO_SNDBUF */
451 err = set_ip_dscp(s, family, dscp);
453 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
454 if(family == AF_INET6) {
455 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
459 # if defined(IPV6_V6ONLY)
461 int val=(v6only==2)?0:1;
462 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
463 (void*)&val, (socklen_t)sizeof(val)) < 0) {
464 log_err("setsockopt(..., IPV6_V6ONLY"
465 ", ...) failed: %s", sock_strerror(errno));
473 # if defined(IPV6_USE_MIN_MTU)
475 * There is no fragmentation of IPv6 datagrams
476 * during forwarding in the network. Therefore
477 * we do not send UDP datagrams larger than
478 * the minimum IPv6 MTU of 1280 octets. The
479 * EDNS0 message length can be larger if the
480 * network stack supports IPV6_USE_MIN_MTU.
482 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
483 (void*)&on, (socklen_t)sizeof(on)) < 0) {
484 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
485 "...) failed: %s", sock_strerror(errno));
491 # elif defined(IPV6_MTU)
493 * On Linux, to send no larger than 1280, the PMTUD is
494 * disabled by default for datagrams anyway, so we set
497 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
498 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
499 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
500 sock_strerror(errno));
506 # endif /* IPv6 MTU */
507 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
508 # if defined(IP_PMTUDISC_OMIT)
509 action = IP_PMTUDISC_OMIT;
510 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
511 &action, (socklen_t)sizeof(action)) < 0) {
513 if (errno != EINVAL) {
514 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
527 if (omit6_set == 0) {
528 action = IP_PMTUDISC_DONT;
529 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
530 &action, (socklen_t)sizeof(action)) < 0) {
531 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
539 # endif /* IPV6_MTU_DISCOVER */
540 } else if(family == AF_INET) {
541 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
542 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
543 * PMTU information is not accepted, but fragmentation is allowed
544 * if and only if the packet size exceeds the outgoing interface MTU
545 * (and also uses the interface mtu to determine the size of the packets).
546 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
547 * FreeBSD already has same semantics without setting the option. */
550 # if defined(IP_PMTUDISC_OMIT)
551 action = IP_PMTUDISC_OMIT;
552 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
553 &action, (socklen_t)sizeof(action)) < 0) {
555 if (errno != EINVAL) {
556 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
570 action = IP_PMTUDISC_DONT;
571 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
572 &action, (socklen_t)sizeof(action)) < 0) {
573 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
581 # elif defined(IP_DONTFRAG) && !defined(__APPLE__)
582 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
583 * but does not work on that version, so we exclude it */
585 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
586 &off, (socklen_t)sizeof(off)) < 0) {
587 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
594 # endif /* IPv4 MTU */
598 !got_fd_from_systemd &&
600 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
605 *inuse = (errno == EADDRINUSE);
606 /* detect freebsd jail with no ipv6 permission */
607 if(family==AF_INET6 && errno==EINVAL)
609 else if(errno != EADDRINUSE &&
610 !(errno == EACCES && verbosity < 4 && !listen)
612 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
615 log_err_addr("can't bind socket", strerror(errno),
616 (struct sockaddr_storage*)addr, addrlen);
618 #endif /* EADDRINUSE */
619 #else /* USE_WINSOCK */
620 if(WSAGetLastError() != WSAEADDRINUSE &&
621 WSAGetLastError() != WSAEADDRNOTAVAIL &&
622 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
623 log_err_addr("can't bind socket",
624 wsa_strerror(WSAGetLastError()),
625 (struct sockaddr_storage*)addr, addrlen);
627 #endif /* USE_WINSOCK */
631 if(!fd_set_nonblock(s)) {
641 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
642 int* reuseport, int transparent, int mss, int nodelay, int freebind,
643 int use_systemd, int dscp)
647 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
651 int got_fd_from_systemd = 0;
653 #ifdef USE_TCP_FASTOPEN
656 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
659 #if !defined(IP_FREEBIND)
662 verbose_print_addr(addr);
667 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
668 addr->ai_addr, addr->ai_addrlen,
673 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
675 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
680 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
681 WSAGetLastError() == WSAEPROTONOSUPPORT) {
686 log_err("can't create socket: %s", sock_strerror(errno));
690 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
691 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
692 (socklen_t)sizeof(on)) < 0) {
694 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
697 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
698 wsa_strerror(WSAGetLastError()));
702 log_warn(" setsockopt(TCP_NODELAY) unsupported");
703 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
706 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
707 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
708 (socklen_t)sizeof(mss)) < 0) {
709 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
710 sock_strerror(errno));
713 " tcp socket mss set to %d", mss);
716 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
717 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
721 got_fd_from_systemd = 1;
725 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
726 (socklen_t)sizeof(on)) < 0) {
727 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
728 sock_strerror(errno));
732 #endif /* SO_REUSEADDR */
734 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
735 (socklen_t)sizeof(on)) < 0) {
736 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
739 #endif /* IP_FREEBIND */
741 /* try to set SO_REUSEPORT so that incoming
742 * connections are distributed evenly among the receiving threads.
743 * Each thread must have its own socket bound to the same port,
744 * with SO_REUSEPORT set on each socket.
746 if (reuseport && *reuseport &&
747 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
748 (socklen_t)sizeof(on)) < 0) {
750 if(errno != ENOPROTOOPT || verbosity >= 3)
751 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
754 /* this option is not essential, we can continue */
759 #endif /* defined(SO_REUSEPORT) */
760 #if defined(IPV6_V6ONLY)
761 if(addr->ai_family == AF_INET6 && v6only) {
762 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
763 (void*)&on, (socklen_t)sizeof(on)) < 0) {
764 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
765 sock_strerror(errno));
772 #endif /* IPV6_V6ONLY */
773 #ifdef IP_TRANSPARENT
775 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
776 (socklen_t)sizeof(on)) < 0) {
777 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
780 #elif defined(IP_BINDANY)
782 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
783 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
784 (void*)&on, (socklen_t)sizeof(on)) < 0) {
785 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
786 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
788 #elif defined(SO_BINDANY)
790 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
792 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
795 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
796 err = set_ip_dscp(s, addr->ai_family, dscp);
798 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
801 !got_fd_from_systemd &&
803 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
805 /* detect freebsd jail with no ipv6 permission */
806 if(addr->ai_family==AF_INET6 && errno==EINVAL)
809 log_err_addr("can't bind socket", strerror(errno),
810 (struct sockaddr_storage*)addr->ai_addr,
814 log_err_addr("can't bind socket",
815 wsa_strerror(WSAGetLastError()),
816 (struct sockaddr_storage*)addr->ai_addr,
822 if(!fd_set_nonblock(s)) {
826 if(listen(s, TCP_BACKLOG) == -1) {
827 log_err("can't listen: %s", sock_strerror(errno));
831 #ifdef USE_TCP_FASTOPEN
832 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
833 against IP spoofing attacks as suggested in RFC7413 */
835 /* OS X implementation only supports qlen of 1 via this call. Actual
836 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
839 /* 5 is recommended on linux */
842 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
843 sizeof(qlen))) == -1 ) {
845 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
846 disabled, except when verbosity enabled for debugging */
847 if(errno != ENOPROTOOPT || verbosity >= 3) {
850 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
852 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
863 set_ip_dscp(int socket, int addrfamily, int dscp)
872 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0)
873 return sock_strerror(errno);
876 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
877 return sock_strerror(errno);
884 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
889 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
895 struct sockaddr_un usock;
900 verbose(VERB_ALGO, "creating unix socket %s", path);
901 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
902 /* this member exists on BSDs, not Linux */
903 usock.sun_len = (unsigned)sizeof(usock);
905 usock.sun_family = AF_LOCAL;
906 /* length is 92-108, 104 on FreeBSD */
907 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
909 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
910 log_err("Cannot create local socket %s (%s)",
911 path, strerror(errno));
915 if (unlink(path) && errno != ENOENT) {
916 /* The socket already exists and cannot be removed */
917 log_err("Cannot remove old local socket %s (%s)",
918 path, strerror(errno));
922 if (bind(s, (struct sockaddr *)&usock,
923 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
924 log_err("Cannot bind local socket %s (%s)",
925 path, strerror(errno));
929 if (!fd_set_nonblock(s)) {
930 log_err("Cannot set non-blocking mode");
934 if (listen(s, TCP_BACKLOG) == -1) {
935 log_err("can't listen: %s", strerror(errno));
939 (void)noproto; /*unused*/
952 log_err("Local sockets are not supported");
960 * Create socket from getaddrinfo results
963 make_sock(int stype, const char* ifname, const char* port,
964 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
965 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
966 int use_systemd, int dscp, struct unbound_socket* ub_sock)
968 struct addrinfo *res = NULL;
969 int r, s, inuse, noproto;
970 hints->ai_socktype = stype;
972 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
974 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
975 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
979 log_err("node %s:%s getaddrinfo: %s %s",
980 ifname?ifname:"default", port, gai_strerror(r),
982 r==EAI_SYSTEM?(char*)strerror(errno):""
989 if(stype == SOCK_DGRAM) {
990 verbose_print_addr(res);
991 s = create_udp_sock(res->ai_family, res->ai_socktype,
992 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
993 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
994 reuseport, transparent, freebind, use_systemd, dscp);
995 if(s == -1 && inuse) {
996 log_err("bind: address already in use");
997 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
1001 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1002 transparent, tcp_mss, nodelay, freebind, use_systemd,
1004 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1009 ub_sock->addr = res;
1011 ub_sock->fam = hints->ai_family;
1016 /** make socket and first see if ifname contains port override info */
1018 make_sock_port(int stype, const char* ifname, const char* port,
1019 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1020 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1021 int use_systemd, int dscp, struct unbound_socket* ub_sock)
1023 char* s = strchr(ifname, '@');
1025 /* override port with ifspec@port */
1028 if((size_t)(s-ifname) >= sizeof(newif)) {
1029 log_err("ifname too long: %s", ifname);
1033 if(strlen(s+1) >= sizeof(p)) {
1034 log_err("portnumber too long: %s", ifname);
1038 (void)strlcpy(newif, ifname, sizeof(newif));
1039 newif[s-ifname] = 0;
1040 (void)strlcpy(p, s+1, sizeof(p));
1042 return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1043 snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1044 use_systemd, dscp, ub_sock);
1046 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1047 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1052 * Add port to open ports list.
1053 * @param list: list head. changed.
1055 * @param ftype: if fd is UDP.
1056 * @param ub_sock: socket with address.
1057 * @return false on failure. list in unchanged then.
1060 port_insert(struct listen_port** list, int s, enum listen_type ftype, struct unbound_socket* ub_sock)
1062 struct listen_port* item = (struct listen_port*)malloc(
1063 sizeof(struct listen_port));
1068 item->ftype = ftype;
1069 item->socket = ub_sock;
1074 /** set fd to receive source address packet info */
1076 set_recvpktinfo(int s, int family)
1078 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1083 if(family == AF_INET6) {
1084 # ifdef IPV6_RECVPKTINFO
1085 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1086 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1087 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1091 # elif defined(IPV6_PKTINFO)
1092 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1093 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1094 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1099 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1100 "disable interface-automatic or do-ip6 in config");
1102 # endif /* defined IPV6_RECVPKTINFO */
1104 } else if(family == AF_INET) {
1106 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1107 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1108 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1112 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1113 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1114 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1115 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1120 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1121 "interface-automatic or do-ip4 in config");
1123 # endif /* IP_PKTINFO */
1129 /** see if interface is ssl, its port number == the ssl port number */
1131 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1132 struct config_strlist* tls_additional_port)
1134 struct config_strlist* s;
1135 char* p = strchr(ifname, '@');
1136 if(!p && atoi(port) == ssl_port)
1138 if(p && atoi(p+1) == ssl_port)
1140 for(s = tls_additional_port; s; s = s->next) {
1141 if(p && atoi(p+1) == atoi(s->str))
1143 if(!p && atoi(port) == atoi(s->str))
1150 * Helper for ports_open. Creates one interface (or NULL for default).
1151 * @param ifname: The interface ip address.
1152 * @param do_auto: use automatic interface detection.
1153 * If enabled, then ifname must be the wildcard name.
1154 * @param do_udp: if udp should be used.
1155 * @param do_tcp: if udp should be used.
1156 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1157 * @param port: Port number to use (as string).
1158 * @param list: list of open ports, appended to, changed to point to list head.
1159 * @param rcv: receive buffer size for UDP
1160 * @param snd: send buffer size for UDP
1161 * @param ssl_port: ssl service port number
1162 * @param tls_additional_port: list of additional ssl service port numbers.
1163 * @param https_port: DoH service port number
1164 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1165 * set to false on exit if reuseport failed due to no kernel support.
1166 * @param transparent: set IP_TRANSPARENT socket option.
1167 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1168 * @param freebind: set IP_FREEBIND socket option.
1169 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1170 * @param use_systemd: if true, fetch sockets from systemd.
1171 * @param dnscrypt_port: dnscrypt service port number
1172 * @param dscp: DSCP to use.
1173 * @return: returns false on error.
1176 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1177 struct addrinfo *hints, const char* port, struct listen_port** list,
1178 size_t rcv, size_t snd, int ssl_port,
1179 struct config_strlist* tls_additional_port, int https_port,
1180 int* reuseport, int transparent, int tcp_mss, int freebind,
1181 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1184 int is_https = if_is_https(ifname, port, https_port);
1185 int nodelay = is_https && http2_nodelay;
1186 struct unbound_socket* ub_sock;
1188 int is_dnscrypt = ((strchr(ifname, '@') &&
1189 atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1190 (!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1192 int is_dnscrypt = 0;
1193 (void)dnscrypt_port;
1196 if(!do_udp && !do_tcp)
1200 ub_sock = calloc(1, sizeof(struct unbound_socket));
1203 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1204 &noip6, rcv, snd, reuseport, transparent,
1205 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1206 freeaddrinfo(ub_sock->addr);
1209 log_warn("IPv6 protocol not available");
1214 /* getting source addr packet info is highly non-portable */
1215 if(!set_recvpktinfo(s, hints->ai_family)) {
1217 freeaddrinfo(ub_sock->addr);
1221 if(!port_insert(list, s,
1222 is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil, ub_sock)) {
1224 freeaddrinfo(ub_sock->addr);
1229 ub_sock = calloc(1, sizeof(struct unbound_socket));
1232 /* regular udp socket */
1233 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1234 &noip6, rcv, snd, reuseport, transparent,
1235 tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1236 freeaddrinfo(ub_sock->addr);
1239 log_warn("IPv6 protocol not available");
1244 if(!port_insert(list, s,
1245 is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp, ub_sock)) {
1247 freeaddrinfo(ub_sock->addr);
1253 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1254 tls_additional_port);
1255 enum listen_type port_type;
1256 ub_sock = calloc(1, sizeof(struct unbound_socket));
1260 port_type = listen_type_ssl;
1262 port_type = listen_type_http;
1263 else if(is_dnscrypt)
1264 port_type = listen_type_tcp_dnscrypt;
1266 port_type = listen_type_tcp;
1267 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1268 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1269 freebind, use_systemd, dscp, ub_sock)) == -1) {
1270 freeaddrinfo(ub_sock->addr);
1273 /*log_warn("IPv6 protocol not available");*/
1279 verbose(VERB_ALGO, "setup TCP for SSL service");
1280 if(!port_insert(list, s, port_type, ub_sock)) {
1282 freeaddrinfo(ub_sock->addr);
1291 * Add items to commpoint list in front.
1292 * @param c: commpoint to add.
1293 * @param front: listen struct.
1294 * @return: false on failure.
1297 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1299 struct listen_list* item = (struct listen_list*)malloc(
1300 sizeof(struct listen_list));
1304 item->next = front->cps;
1309 struct listen_dnsport*
1310 listen_create(struct comm_base* base, struct listen_port* ports,
1311 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1312 int harden_large_queries, uint32_t http_max_streams,
1313 char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1314 void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1317 struct listen_dnsport* front = (struct listen_dnsport*)
1318 malloc(sizeof(struct listen_dnsport));
1322 front->udp_buff = sldns_buffer_new(bufsize);
1324 front->dnscrypt_udp_buff = NULL;
1326 if(!front->udp_buff) {
1330 if(!stream_wait_lock_inited) {
1331 lock_basic_init(&stream_wait_count_lock);
1332 stream_wait_lock_inited = 1;
1334 if(!http2_query_buffer_lock_inited) {
1335 lock_basic_init(&http2_query_buffer_count_lock);
1336 http2_query_buffer_lock_inited = 1;
1338 if(!http2_response_buffer_lock_inited) {
1339 lock_basic_init(&http2_response_buffer_count_lock);
1340 http2_response_buffer_lock_inited = 1;
1343 /* create comm points as needed */
1345 struct comm_point* cp = NULL;
1346 if(ports->ftype == listen_type_udp ||
1347 ports->ftype == listen_type_udp_dnscrypt)
1348 cp = comm_point_create_udp(base, ports->fd,
1349 front->udp_buff, cb, cb_arg, ports->socket);
1350 else if(ports->ftype == listen_type_tcp ||
1351 ports->ftype == listen_type_tcp_dnscrypt)
1352 cp = comm_point_create_tcp(base, ports->fd,
1353 tcp_accept_count, tcp_idle_timeout,
1354 harden_large_queries, 0, NULL,
1355 tcp_conn_limit, bufsize, front->udp_buff,
1356 ports->ftype, cb, cb_arg, ports->socket);
1357 else if(ports->ftype == listen_type_ssl ||
1358 ports->ftype == listen_type_http) {
1359 cp = comm_point_create_tcp(base, ports->fd,
1360 tcp_accept_count, tcp_idle_timeout,
1361 harden_large_queries,
1362 http_max_streams, http_endpoint,
1363 tcp_conn_limit, bufsize, front->udp_buff,
1364 ports->ftype, cb, cb_arg, ports->socket);
1365 if(http_notls && ports->ftype == listen_type_http)
1369 if(ports->ftype == listen_type_http) {
1370 if(!sslctx && !http_notls) {
1371 log_warn("HTTPS port configured, but no TLS "
1372 "tls-service-key or tls-service-pem "
1375 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1377 log_warn("Unbound is not compiled with an "
1378 "OpenSSL version supporting ALPN "
1379 " (OpenSSL >= 1.0.2). This is required "
1380 "to use DNS-over-HTTPS");
1382 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1383 log_warn("Unbound is not compiled with "
1384 "nghttp2. This is required to use "
1388 } else if(ports->ftype == listen_type_udpancil ||
1389 ports->ftype == listen_type_udpancil_dnscrypt)
1390 cp = comm_point_create_udp_ancil(base, ports->fd,
1391 front->udp_buff, cb, cb_arg, ports->socket);
1393 log_err("can't create commpoint");
1394 listen_delete(front);
1398 cp->do_not_close = 1;
1400 if (ports->ftype == listen_type_udp_dnscrypt ||
1401 ports->ftype == listen_type_tcp_dnscrypt ||
1402 ports->ftype == listen_type_udpancil_dnscrypt) {
1404 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1405 if(!cp->dnscrypt_buffer) {
1406 log_err("can't alloc dnscrypt_buffer");
1407 comm_point_delete(cp);
1408 listen_delete(front);
1411 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1414 if(!listen_cp_insert(cp, front)) {
1415 log_err("malloc failed");
1416 comm_point_delete(cp);
1417 listen_delete(front);
1420 ports = ports->next;
1423 log_err("Could not open sockets to accept queries.");
1424 listen_delete(front);
1432 listen_list_delete(struct listen_list* list)
1434 struct listen_list *p = list, *pn;
1437 comm_point_delete(p->com);
1444 listen_delete(struct listen_dnsport* front)
1448 listen_list_delete(front->cps);
1450 if(front->dnscrypt_udp_buff &&
1451 front->udp_buff != front->dnscrypt_udp_buff) {
1452 sldns_buffer_free(front->dnscrypt_udp_buff);
1455 sldns_buffer_free(front->udp_buff);
1457 if(stream_wait_lock_inited) {
1458 stream_wait_lock_inited = 0;
1459 lock_basic_destroy(&stream_wait_count_lock);
1461 if(http2_query_buffer_lock_inited) {
1462 http2_query_buffer_lock_inited = 0;
1463 lock_basic_destroy(&http2_query_buffer_count_lock);
1465 if(http2_response_buffer_lock_inited) {
1466 http2_response_buffer_lock_inited = 0;
1467 lock_basic_destroy(&http2_response_buffer_count_lock);
1471 #ifdef HAVE_GETIFADDRS
1473 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1475 struct ifaddrs *ifa;
1477 int last_ip_addresses_size = *ip_addresses_size;
1479 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1482 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */
1483 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1485 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1488 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1489 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1490 || strncmp(ifa->ifa_name, search_ifa,
1491 atsign-search_ifa) != 0)
1494 if(strcmp(ifa->ifa_name, search_ifa) != 0)
1499 if(ifa->ifa_addr == NULL)
1502 family = ifa->ifa_addr->sa_family;
1503 if(family == AF_INET) {
1504 char a4[INET_ADDRSTRLEN + 1];
1505 struct sockaddr_in *in4 = (struct sockaddr_in *)
1507 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1508 log_err("inet_ntop failed");
1511 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1515 else if(family == AF_INET6) {
1516 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1518 char a6[INET6_ADDRSTRLEN + 1];
1519 char if_index_name[IF_NAMESIZE + 1];
1520 if_index_name[0] = 0;
1521 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1522 log_err("inet_ntop failed");
1525 (void)if_indextoname(in6->sin6_scope_id,
1526 (char *)if_index_name);
1527 if (strlen(if_index_name) != 0) {
1528 snprintf(addr_buf, sizeof(addr_buf),
1529 "%s%%%s%s", a6, if_index_name, atsign);
1531 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1539 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1541 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1543 log_err("realloc failed: out of memory");
1546 *ip_addresses = tmpbuf;
1548 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1549 if(!(*ip_addresses)[*ip_addresses_size]) {
1550 log_err("strdup failed: out of memory");
1553 (*ip_addresses_size)++;
1556 if (*ip_addresses_size == last_ip_addresses_size) {
1557 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1559 log_err("realloc failed: out of memory");
1562 *ip_addresses = tmpbuf;
1564 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1565 if(!(*ip_addresses)[*ip_addresses_size]) {
1566 log_err("strdup failed: out of memory");
1569 (*ip_addresses_size)++;
1573 #endif /* HAVE_GETIFADDRS */
1575 int resolve_interface_names(char** ifs, int num_ifs,
1576 struct config_strlist* list, char*** resif, int* num_resif)
1578 #ifdef HAVE_GETIFADDRS
1579 struct ifaddrs *addrs = NULL;
1580 if(num_ifs == 0 && list == NULL) {
1585 if(getifaddrs(&addrs) == -1) {
1586 log_err("failed to list interfaces: getifaddrs: %s",
1593 for(i=0; i<num_ifs; i++) {
1594 if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1596 config_del_strarray(*resif, *num_resif);
1604 struct config_strlist* p;
1605 for(p = list; p; p = p->next) {
1606 if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1608 config_del_strarray(*resif, *num_resif);
1618 struct config_strlist* p;
1619 if(num_ifs == 0 && list == NULL) {
1624 *num_resif = num_ifs;
1625 for(p = list; p; p = p->next) {
1628 *resif = calloc(*num_resif, sizeof(**resif));
1630 log_err("out of memory");
1635 for(i=0; i<num_ifs; i++) {
1636 (*resif)[i] = strdup(ifs[i]);
1637 if(!((*resif)[i])) {
1638 log_err("out of memory");
1639 config_del_strarray(*resif, *num_resif);
1648 for(p = list; p; p = p->next) {
1649 (*resif)[idx] = strdup(p->str);
1650 if(!((*resif)[idx])) {
1651 log_err("out of memory");
1652 config_del_strarray(*resif, *num_resif);
1661 #endif /* HAVE_GETIFADDRS */
1665 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1668 struct listen_port* list = NULL;
1669 struct addrinfo hints;
1670 int i, do_ip4, do_ip6;
1671 int do_tcp, do_auto;
1673 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1674 do_ip4 = cfg->do_ip4;
1675 do_ip6 = cfg->do_ip6;
1676 do_tcp = cfg->do_tcp;
1677 do_auto = cfg->if_automatic && cfg->do_udp;
1678 if(cfg->incoming_num_tcp == 0)
1682 memset(&hints, 0, sizeof(hints));
1683 hints.ai_flags = AI_PASSIVE;
1684 /* no name lookups on our listening ports */
1686 hints.ai_flags |= AI_NUMERICHOST;
1687 hints.ai_family = AF_UNSPEC;
1691 if(!do_ip4 && !do_ip6) {
1694 /* create ip4 and ip6 ports so that return addresses are nice. */
1695 if(do_auto || num_ifs == 0) {
1697 hints.ai_family = AF_INET6;
1698 if(!ports_create_if(do_auto?"::0":"::1",
1699 do_auto, cfg->do_udp, do_tcp,
1700 &hints, portbuf, &list,
1701 cfg->so_rcvbuf, cfg->so_sndbuf,
1702 cfg->ssl_port, cfg->tls_additional_port,
1703 cfg->https_port, reuseport, cfg->ip_transparent,
1704 cfg->tcp_mss, cfg->ip_freebind,
1705 cfg->http_nodelay, cfg->use_systemd,
1706 cfg->dnscrypt_port, cfg->ip_dscp)) {
1707 listening_ports_free(list);
1712 hints.ai_family = AF_INET;
1713 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1714 do_auto, cfg->do_udp, do_tcp,
1715 &hints, portbuf, &list,
1716 cfg->so_rcvbuf, cfg->so_sndbuf,
1717 cfg->ssl_port, cfg->tls_additional_port,
1718 cfg->https_port, reuseport, cfg->ip_transparent,
1719 cfg->tcp_mss, cfg->ip_freebind,
1720 cfg->http_nodelay, cfg->use_systemd,
1721 cfg->dnscrypt_port, cfg->ip_dscp)) {
1722 listening_ports_free(list);
1726 } else for(i = 0; i<num_ifs; i++) {
1727 if(str_is_ip6(ifs[i])) {
1730 hints.ai_family = AF_INET6;
1731 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1732 do_tcp, &hints, portbuf, &list,
1733 cfg->so_rcvbuf, cfg->so_sndbuf,
1734 cfg->ssl_port, cfg->tls_additional_port,
1735 cfg->https_port, reuseport, cfg->ip_transparent,
1736 cfg->tcp_mss, cfg->ip_freebind,
1737 cfg->http_nodelay, cfg->use_systemd,
1738 cfg->dnscrypt_port, cfg->ip_dscp)) {
1739 listening_ports_free(list);
1745 hints.ai_family = AF_INET;
1746 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1747 do_tcp, &hints, portbuf, &list,
1748 cfg->so_rcvbuf, cfg->so_sndbuf,
1749 cfg->ssl_port, cfg->tls_additional_port,
1750 cfg->https_port, reuseport, cfg->ip_transparent,
1751 cfg->tcp_mss, cfg->ip_freebind,
1752 cfg->http_nodelay, cfg->use_systemd,
1753 cfg->dnscrypt_port, cfg->ip_dscp)) {
1754 listening_ports_free(list);
1763 void listening_ports_free(struct listen_port* list)
1765 struct listen_port* nx;
1768 if(list->fd != -1) {
1769 sock_close(list->fd);
1771 /* rc_ports don't have ub_socket */
1773 freeaddrinfo(list->socket->addr);
1781 size_t listen_get_mem(struct listen_dnsport* listen)
1783 struct listen_list* p;
1784 size_t s = sizeof(*listen) + sizeof(*listen->base) +
1785 sizeof(*listen->udp_buff) +
1786 sldns_buffer_capacity(listen->udp_buff);
1788 s += sizeof(*listen->dnscrypt_udp_buff);
1789 if(listen->udp_buff != listen->dnscrypt_udp_buff){
1790 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1793 for(p = listen->cps; p; p = p->next) {
1795 s += comm_point_get_mem(p->com);
1800 void listen_stop_accept(struct listen_dnsport* listen)
1802 /* do not stop the ones that have no tcp_free list
1803 * (they have already stopped listening) */
1804 struct listen_list* p;
1805 for(p=listen->cps; p; p=p->next) {
1806 if(p->com->type == comm_tcp_accept &&
1807 p->com->tcp_free != NULL) {
1808 comm_point_stop_listening(p->com);
1813 void listen_start_accept(struct listen_dnsport* listen)
1815 /* do not start the ones that have no tcp_free list, it is no
1816 * use to listen to them because they have no free tcp handlers */
1817 struct listen_list* p;
1818 for(p=listen->cps; p; p=p->next) {
1819 if(p->com->type == comm_tcp_accept &&
1820 p->com->tcp_free != NULL) {
1821 comm_point_start_listening(p->com, -1, -1);
1826 struct tcp_req_info*
1827 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1829 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1831 log_err("malloc failure for new stream outoforder processing structure");
1834 memset(req, 0, sizeof(*req));
1835 req->spool_buffer = spoolbuf;
1840 tcp_req_info_delete(struct tcp_req_info* req)
1843 tcp_req_info_clear(req);
1844 /* cp is pointer back to commpoint that owns this struct and
1845 * called delete on us */
1846 /* spool_buffer is shared udp buffer, not deleted here */
1850 void tcp_req_info_clear(struct tcp_req_info* req)
1852 struct tcp_req_open_item* open, *nopen;
1853 struct tcp_req_done_item* item, *nitem;
1856 /* free outstanding request mesh reply entries */
1857 open = req->open_req_list;
1860 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1864 req->open_req_list = NULL;
1865 req->num_open_req = 0;
1867 /* free pending writable result packets */
1868 item = req->done_req_list;
1871 lock_basic_lock(&stream_wait_count_lock);
1872 stream_wait_count -= (sizeof(struct tcp_req_done_item)
1874 lock_basic_unlock(&stream_wait_count_lock);
1879 req->done_req_list = NULL;
1880 req->num_done_req = 0;
1881 req->read_is_closed = 0;
1885 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1887 struct tcp_req_open_item* open, *prev = NULL;
1888 if(!req || !m) return;
1889 open = req->open_req_list;
1891 if(open->mesh_state == m) {
1892 struct tcp_req_open_item* next;
1893 if(prev) prev->next = open->next;
1894 else req->open_req_list = open->next;
1895 /* caller has to manage the mesh state reply entry */
1898 req->num_open_req --;
1909 /** setup listening for read or write */
1911 tcp_req_info_setup_listen(struct tcp_req_info* req)
1916 if(req->cp->tcp_byte_count != 0) {
1917 /* cannot change, halfway through */
1921 if(!req->cp->tcp_is_reading)
1923 if(!req->read_is_closed)
1927 req->cp->tcp_is_reading = 0;
1928 comm_point_stop_listening(req->cp);
1929 comm_point_start_listening(req->cp, -1,
1930 adjusted_tcp_timeout(req->cp));
1932 req->cp->tcp_is_reading = 1;
1933 comm_point_stop_listening(req->cp);
1934 comm_point_start_listening(req->cp, -1,
1935 adjusted_tcp_timeout(req->cp));
1936 /* and also read it (from SSL stack buffers), so
1937 * no event read event is expected since the remainder of
1938 * the TLS frame is sitting in the buffers. */
1939 req->read_again = 1;
1941 comm_point_stop_listening(req->cp);
1942 comm_point_start_listening(req->cp, -1,
1943 adjusted_tcp_timeout(req->cp));
1944 comm_point_listen_for_rw(req->cp, 0, 0);
1948 /** remove first item from list of pending results */
1949 static struct tcp_req_done_item*
1950 tcp_req_info_pop_done(struct tcp_req_info* req)
1952 struct tcp_req_done_item* item;
1953 log_assert(req->num_done_req > 0 && req->done_req_list);
1954 item = req->done_req_list;
1955 lock_basic_lock(&stream_wait_count_lock);
1956 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1957 lock_basic_unlock(&stream_wait_count_lock);
1958 req->done_req_list = req->done_req_list->next;
1959 req->num_done_req --;
1963 /** Send given buffer and setup to write */
1965 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1968 sldns_buffer_clear(req->cp->buffer);
1969 sldns_buffer_write(req->cp->buffer, buf, len);
1970 sldns_buffer_flip(req->cp->buffer);
1972 req->cp->tcp_is_reading = 0; /* we are now writing */
1975 /** pick up the next result and start writing it to the channel */
1977 tcp_req_pickup_next_result(struct tcp_req_info* req)
1979 if(req->num_done_req > 0) {
1980 /* unlist the done item from the list of pending results */
1981 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1982 tcp_req_info_start_write_buf(req, item->buf, item->len);
1988 /** the read channel has closed */
1990 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1992 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1993 /* reset byte count for (potential) partial read */
1994 req->cp->tcp_byte_count = 0;
1995 /* if we still have results to write, pick up next and write it */
1996 if(req->num_done_req != 0) {
1997 tcp_req_pickup_next_result(req);
1998 tcp_req_info_setup_listen(req);
2001 /* if nothing to do, this closes the connection */
2002 if(req->num_open_req == 0 && req->num_done_req == 0)
2004 /* otherwise, we must be waiting for dns resolve, wait with timeout */
2005 req->read_is_closed = 1;
2006 tcp_req_info_setup_listen(req);
2011 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2013 /* back to reading state, we finished this write event */
2014 sldns_buffer_clear(req->cp->buffer);
2015 if(req->num_done_req == 0 && req->read_is_closed) {
2016 /* no more to write and nothing to read, close it */
2017 comm_point_drop_reply(&req->cp->repinfo);
2020 req->cp->tcp_is_reading = 1;
2021 /* see if another result needs writing */
2022 tcp_req_pickup_next_result(req);
2024 /* see if there is more to write, if not stop_listening for writing */
2025 /* see if new requests are allowed, if so, start_listening
2027 tcp_req_info_setup_listen(req);
2031 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2033 struct comm_point* c = req->cp;
2035 /* we want to read up several requests, unless there are
2036 * pending answers */
2040 req->in_worker_handle = 1;
2041 sldns_buffer_set_limit(req->spool_buffer, 0);
2042 /* handle the current request */
2043 /* this calls the worker handle request routine that could give
2044 * a cache response, or localdata response, or drop the reply,
2045 * or schedule a mesh entry for later */
2046 fptr_ok(fptr_whitelist_comm_point(c->callback));
2047 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2048 req->in_worker_handle = 0;
2049 /* there is an answer, put it up. It is already in the
2050 * c->buffer, just send it. */
2051 /* since we were just reading a query, the channel is
2052 * clear to write to */
2054 c->tcp_is_reading = 0;
2055 comm_point_stop_listening(c);
2056 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2059 req->in_worker_handle = 0;
2060 /* it should be waiting in the mesh for recursion.
2061 * If mesh failed to add a new entry and called commpoint_drop_reply.
2062 * Then the mesh state has been cleared. */
2064 /* the reply has been dropped, stream has been closed. */
2067 /* If mesh failed(mallocfail) and called commpoint_send_reply with
2068 * something like servfail then we pick up that reply below. */
2073 sldns_buffer_clear(c->buffer);
2074 /* if pending answers, pick up an answer and start sending it */
2075 tcp_req_pickup_next_result(req);
2077 /* if answers pending, start sending answers */
2078 /* read more requests if we can have more requests */
2079 tcp_req_info_setup_listen(req);
2083 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2084 struct mesh_area* mesh, struct mesh_state* m)
2086 struct tcp_req_open_item* item;
2087 log_assert(req && mesh && m);
2088 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2090 item->next = req->open_req_list;
2092 item->mesh_state = m;
2093 req->open_req_list = item;
2094 req->num_open_req++;
2098 /** Add a result to the result list. At the end. */
2100 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2102 struct tcp_req_done_item* last = NULL;
2103 struct tcp_req_done_item* item;
2106 /* see if we have space */
2107 space = sizeof(struct tcp_req_done_item) + len;
2108 lock_basic_lock(&stream_wait_count_lock);
2109 if(stream_wait_count + space > stream_wait_max) {
2110 lock_basic_unlock(&stream_wait_count_lock);
2111 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2114 stream_wait_count += space;
2115 lock_basic_unlock(&stream_wait_count_lock);
2117 /* find last element */
2118 last = req->done_req_list;
2119 while(last && last->next)
2122 /* create new element */
2123 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2125 log_err("malloc failure, for stream result list");
2130 item->buf = memdup(buf, len);
2133 log_err("malloc failure, adding reply to stream result list");
2138 if(last) last->next = item;
2139 else req->done_req_list = item;
2140 req->num_done_req++;
2145 tcp_req_info_send_reply(struct tcp_req_info* req)
2147 if(req->in_worker_handle) {
2148 /* reply from mesh is in the spool_buffer */
2149 /* copy now, so that the spool buffer is free for other tasks
2150 * before the callback is done */
2151 sldns_buffer_clear(req->cp->buffer);
2152 sldns_buffer_write(req->cp->buffer,
2153 sldns_buffer_begin(req->spool_buffer),
2154 sldns_buffer_limit(req->spool_buffer));
2155 sldns_buffer_flip(req->cp->buffer);
2159 /* now that the query has been handled, that mesh_reply entry
2160 * should be removed, from the tcp_req_info list,
2161 * the mesh state cleanup removes then with region_cleanup and
2162 * replies_sent true. */
2163 /* see if we can send it straight away (we are not doing
2164 * anything else). If so, copy to buffer and start */
2165 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2166 /* buffer is free, and was ready to read new query into,
2167 * but we are now going to use it to send this answer */
2168 tcp_req_info_start_write_buf(req,
2169 sldns_buffer_begin(req->spool_buffer),
2170 sldns_buffer_limit(req->spool_buffer));
2171 /* switch to listen to write events */
2172 comm_point_stop_listening(req->cp);
2173 comm_point_start_listening(req->cp, -1,
2174 adjusted_tcp_timeout(req->cp));
2177 /* queue up the answer behind the others already pending */
2178 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2179 sldns_buffer_limit(req->spool_buffer))) {
2180 /* drop the connection, we are out of resources */
2181 comm_point_drop_reply(&req->cp->repinfo);
2185 size_t tcp_req_info_get_stream_buffer_size(void)
2188 if(!stream_wait_lock_inited)
2189 return stream_wait_count;
2190 lock_basic_lock(&stream_wait_count_lock);
2191 s = stream_wait_count;
2192 lock_basic_unlock(&stream_wait_count_lock);
2196 size_t http2_get_query_buffer_size(void)
2199 if(!http2_query_buffer_lock_inited)
2200 return http2_query_buffer_count;
2201 lock_basic_lock(&http2_query_buffer_count_lock);
2202 s = http2_query_buffer_count;
2203 lock_basic_unlock(&http2_query_buffer_count_lock);
2207 size_t http2_get_response_buffer_size(void)
2210 if(!http2_response_buffer_lock_inited)
2211 return http2_response_buffer_count;
2212 lock_basic_lock(&http2_response_buffer_count_lock);
2213 s = http2_response_buffer_count;
2214 lock_basic_unlock(&http2_response_buffer_count_lock);
2219 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
2220 static ssize_t http2_submit_response_read_callback(
2221 nghttp2_session* ATTR_UNUSED(session),
2222 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2223 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2225 struct http2_stream* h2_stream;
2226 struct http2_session* h2_session = source->ptr;
2227 size_t copylen = length;
2228 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2229 h2_session->session, stream_id))) {
2230 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2232 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2234 if(!h2_stream->rbuffer ||
2235 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2236 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2237 "available in rbuffer");
2238 /* rbuffer will be free'd in frame close cb */
2239 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2242 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2243 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2244 if(copylen > SSIZE_MAX)
2245 copylen = SSIZE_MAX; /* will probably never happen */
2247 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2248 sldns_buffer_skip(h2_stream->rbuffer, copylen);
2250 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2251 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2252 lock_basic_lock(&http2_response_buffer_count_lock);
2253 http2_response_buffer_count -=
2254 sldns_buffer_capacity(h2_stream->rbuffer);
2255 lock_basic_unlock(&http2_response_buffer_count_lock);
2256 sldns_buffer_free(h2_stream->rbuffer);
2257 h2_stream->rbuffer = NULL;
2264 * Send RST_STREAM frame for stream.
2265 * @param h2_session: http2 session to submit frame to
2266 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2267 * @return 0 on error, 1 otherwise
2269 static int http2_submit_rst_stream(struct http2_session* h2_session,
2270 struct http2_stream* h2_stream)
2272 int ret = nghttp2_submit_rst_stream(h2_session->session,
2273 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2274 NGHTTP2_INTERNAL_ERROR);
2276 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2277 "error: %s", nghttp2_strerror(ret));
2284 * DNS response ready to be submitted to nghttp2, to be prepared for sending
2285 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2286 * might be used before this will be sent out.
2287 * @param h2_session: http2 session, containing c->buffer which contains answer
2288 * @return 0 on error, 1 otherwise
2290 int http2_submit_dns_response(struct http2_session* h2_session)
2293 nghttp2_data_provider data_prd;
2295 nghttp2_nv headers[3];
2296 struct http2_stream* h2_stream = h2_session->c->h2_stream;
2300 if(h2_stream->rbuffer) {
2301 log_err("http2 submit response error: rbuffer already "
2305 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2306 log_err("http2 submit response error: c->buffer not complete");
2310 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2311 verbose(VERB_QUERY, "http2: submit response error: "
2316 rlen = sldns_buffer_remaining(h2_session->c->buffer);
2317 snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2319 lock_basic_lock(&http2_response_buffer_count_lock);
2320 if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2321 lock_basic_unlock(&http2_response_buffer_count_lock);
2322 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2323 "in https-response-buffer-size");
2324 return http2_submit_rst_stream(h2_session, h2_stream);
2326 http2_response_buffer_count += rlen;
2327 lock_basic_unlock(&http2_response_buffer_count_lock);
2329 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2330 lock_basic_lock(&http2_response_buffer_count_lock);
2331 http2_response_buffer_count -= rlen;
2332 lock_basic_unlock(&http2_response_buffer_count_lock);
2333 log_err("http2 submit response error: malloc failure");
2337 headers[0].name = (uint8_t*)":status";
2338 headers[0].namelen = 7;
2339 headers[0].value = (uint8_t*)status;
2340 headers[0].valuelen = 3;
2341 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2343 headers[1].name = (uint8_t*)"content-type";
2344 headers[1].namelen = 12;
2345 headers[1].value = (uint8_t*)"application/dns-message";
2346 headers[1].valuelen = 23;
2347 headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2349 headers[2].name = (uint8_t*)"content-length";
2350 headers[2].namelen = 14;
2351 headers[2].value = (uint8_t*)rlen_str;
2352 headers[2].valuelen = strlen(rlen_str);
2353 headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2355 sldns_buffer_write(h2_stream->rbuffer,
2356 sldns_buffer_current(h2_session->c->buffer),
2357 sldns_buffer_remaining(h2_session->c->buffer));
2358 sldns_buffer_flip(h2_stream->rbuffer);
2360 data_prd.source.ptr = h2_session;
2361 data_prd.read_callback = http2_submit_response_read_callback;
2362 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2363 headers, 3, &data_prd);
2365 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2366 "error: %s", nghttp2_strerror(ret));
2372 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2379 /** HTTP status to descriptive string */
2380 static char* http_status_to_str(enum http_status s)
2383 case HTTP_STATUS_OK:
2385 case HTTP_STATUS_BAD_REQUEST:
2386 return "Bad Request";
2387 case HTTP_STATUS_NOT_FOUND:
2389 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2390 return "Payload Too Large";
2391 case HTTP_STATUS_URI_TOO_LONG:
2392 return "URI Too Long";
2393 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2394 return "Unsupported Media Type";
2395 case HTTP_STATUS_NOT_IMPLEMENTED:
2396 return "Not Implemented";
2398 return "Status Unknown";
2401 /** nghttp2 callback. Used to copy error message to nghttp2 session */
2402 static ssize_t http2_submit_error_read_callback(
2403 nghttp2_session* ATTR_UNUSED(session),
2404 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2405 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2407 struct http2_stream* h2_stream;
2408 struct http2_session* h2_session = source->ptr;
2410 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2411 h2_session->session, stream_id))) {
2412 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2414 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2416 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2417 msg = http_status_to_str(h2_stream->status);
2418 if(length < strlen(msg))
2419 return 0; /* not worth trying over multiple frames */
2420 memcpy(buf, msg, strlen(msg));
2426 * HTTP error response ready to be submitted to nghttp2, to be prepared for
2427 * sending out. Message body will contain descriptive string for HTTP status.
2428 * @param h2_session: http2 session to submit to
2429 * @param h2_stream: http2 stream containing HTTP status to use for error
2430 * @return 0 on error, 1 otherwise
2432 static int http2_submit_error(struct http2_session* h2_session,
2433 struct http2_stream* h2_stream)
2437 nghttp2_data_provider data_prd;
2438 nghttp2_nv headers[1]; /* will be copied by nghttp */
2439 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2440 verbose(VERB_QUERY, "http2: submit error failed, "
2444 headers[0].name = (uint8_t*)":status";
2445 headers[0].namelen = 7;
2446 headers[0].value = (uint8_t*)status;
2447 headers[0].valuelen = 3;
2448 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2450 data_prd.source.ptr = h2_session;
2451 data_prd.read_callback = http2_submit_error_read_callback;
2453 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2454 headers, 1, &data_prd);
2456 verbose(VERB_QUERY, "http2: submit error failed, "
2457 "error: %s", nghttp2_strerror(ret));
2464 * Start query handling. Query is stored in the stream, and will be free'd here.
2465 * @param h2_session: http2 session, containing comm point
2466 * @param h2_stream: stream containing buffered query
2467 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2468 * reply available (yet).
2470 static int http2_query_read_done(struct http2_session* h2_session,
2471 struct http2_stream* h2_stream)
2473 log_assert(h2_stream->qbuffer);
2475 if(h2_session->c->h2_stream) {
2476 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2477 "buffer already assigned to stream");
2481 /* the c->buffer might be used by mesh_send_reply and no be cleard
2482 * need to be cleared before use */
2483 sldns_buffer_clear(h2_session->c->buffer);
2484 if(sldns_buffer_remaining(h2_session->c->buffer) <
2485 sldns_buffer_remaining(h2_stream->qbuffer)) {
2486 /* qbuffer will be free'd in frame close cb */
2487 sldns_buffer_clear(h2_session->c->buffer);
2488 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2489 "qbuffer in c->buffer");
2493 sldns_buffer_write(h2_session->c->buffer,
2494 sldns_buffer_current(h2_stream->qbuffer),
2495 sldns_buffer_remaining(h2_stream->qbuffer));
2497 lock_basic_lock(&http2_query_buffer_count_lock);
2498 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2499 lock_basic_unlock(&http2_query_buffer_count_lock);
2500 sldns_buffer_free(h2_stream->qbuffer);
2501 h2_stream->qbuffer = NULL;
2503 sldns_buffer_flip(h2_session->c->buffer);
2504 h2_session->c->h2_stream = h2_stream;
2505 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2506 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2507 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2508 return 1; /* answer in c->buffer */
2510 sldns_buffer_clear(h2_session->c->buffer);
2511 h2_session->c->h2_stream = NULL;
2512 return 0; /* mesh state added, or dropped */
2515 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2516 * stream. Gather collected request data and start query handling. */
2517 static int http2_req_frame_recv_cb(nghttp2_session* session,
2518 const nghttp2_frame* frame, void* cb_arg)
2520 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2521 struct http2_stream* h2_stream;
2522 int query_read_done;
2524 if((frame->hd.type != NGHTTP2_DATA &&
2525 frame->hd.type != NGHTTP2_HEADERS) ||
2526 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2530 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2531 session, frame->hd.stream_id)))
2534 if(h2_stream->invalid_endpoint) {
2535 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2536 goto submit_http_error;
2539 if(h2_stream->invalid_content_type) {
2540 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2541 goto submit_http_error;
2544 if(h2_stream->http_method != HTTP_METHOD_GET &&
2545 h2_stream->http_method != HTTP_METHOD_POST) {
2546 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2547 goto submit_http_error;
2550 if(h2_stream->query_too_large) {
2551 if(h2_stream->http_method == HTTP_METHOD_POST)
2552 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2554 h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2555 goto submit_http_error;
2558 if(!h2_stream->qbuffer) {
2559 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2560 goto submit_http_error;
2563 if(h2_stream->status) {
2565 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2566 "%d", h2_stream->status);
2567 if(!http2_submit_error(h2_session, h2_stream)) {
2568 return NGHTTP2_ERR_CALLBACK_FAILURE;
2572 h2_stream->status = HTTP_STATUS_OK;
2574 sldns_buffer_flip(h2_stream->qbuffer);
2575 h2_session->postpone_drop = 1;
2576 query_read_done = http2_query_read_done(h2_session, h2_stream);
2577 if(query_read_done < 0)
2578 return NGHTTP2_ERR_CALLBACK_FAILURE;
2579 else if(!query_read_done) {
2580 if(h2_session->is_drop) {
2581 /* connection needs to be closed. Return failure to make
2582 * sure no other action are taken anymore on comm point.
2583 * failure will result in reclaiming (and closing)
2585 verbose(VERB_QUERY, "http2 query dropped in worker cb");
2586 h2_session->postpone_drop = 0;
2587 return NGHTTP2_ERR_CALLBACK_FAILURE;
2589 /* nothing to submit right now, query added to mesh. */
2590 h2_session->postpone_drop = 0;
2593 if(!http2_submit_dns_response(h2_session)) {
2594 sldns_buffer_clear(h2_session->c->buffer);
2595 h2_session->c->h2_stream = NULL;
2596 return NGHTTP2_ERR_CALLBACK_FAILURE;
2598 verbose(VERB_QUERY, "http2 query submitted to session");
2599 sldns_buffer_clear(h2_session->c->buffer);
2600 h2_session->c->h2_stream = NULL;
2604 /** nghttp2 callback. Used to detect start of new streams. */
2605 static int http2_req_begin_headers_cb(nghttp2_session* session,
2606 const nghttp2_frame* frame, void* cb_arg)
2608 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2609 struct http2_stream* h2_stream;
2611 if(frame->hd.type != NGHTTP2_HEADERS ||
2612 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2613 /* only interrested in request headers */
2616 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2617 log_err("malloc failure while creating http2 stream");
2618 return NGHTTP2_ERR_CALLBACK_FAILURE;
2620 http2_session_add_stream(h2_session, h2_stream);
2621 ret = nghttp2_session_set_stream_user_data(session,
2622 frame->hd.stream_id, h2_stream);
2624 /* stream does not exist */
2625 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2626 "error: %s", nghttp2_strerror(ret));
2627 return NGHTTP2_ERR_CALLBACK_FAILURE;
2634 * base64url decode, store in qbuffer
2635 * @param h2_session: http2 session
2636 * @param h2_stream: http2 stream
2637 * @param start: start of the base64 string
2638 * @param length: length of the base64 string
2639 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2640 * buffer will be NULL is unparseble.
2642 static int http2_buffer_uri_query(struct http2_session* h2_session,
2643 struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2645 size_t expectb64len;
2647 if(h2_stream->http_method == HTTP_METHOD_POST)
2651 if(h2_stream->qbuffer) {
2652 verbose(VERB_ALGO, "http2_req_header fail, "
2653 "qbuffer already set");
2657 /* calculate size, might be a bit bigger than the real
2658 * decoded buffer size */
2659 expectb64len = sldns_b64_pton_calculate_size(length);
2660 log_assert(expectb64len > 0);
2662 h2_session->c->http2_stream_max_qbuffer_size) {
2663 h2_stream->query_too_large = 1;
2667 lock_basic_lock(&http2_query_buffer_count_lock);
2668 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2669 lock_basic_unlock(&http2_query_buffer_count_lock);
2670 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2671 "in http2-query-buffer-size");
2672 return http2_submit_rst_stream(h2_session, h2_stream);
2674 http2_query_buffer_count += expectb64len;
2675 lock_basic_unlock(&http2_query_buffer_count_lock);
2676 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2677 lock_basic_lock(&http2_query_buffer_count_lock);
2678 http2_query_buffer_count -= expectb64len;
2679 lock_basic_unlock(&http2_query_buffer_count_lock);
2680 log_err("http2_req_header fail, qbuffer "
2685 if(sldns_b64_contains_nonurl((char const*)start, length)) {
2687 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2688 /* copy to the scratch buffer temporarily to terminate the
2689 * string with a zero */
2690 if(length+1 > sizeof(buf)) {
2692 lock_basic_lock(&http2_query_buffer_count_lock);
2693 http2_query_buffer_count -= expectb64len;
2694 lock_basic_unlock(&http2_query_buffer_count_lock);
2695 sldns_buffer_free(h2_stream->qbuffer);
2696 h2_stream->qbuffer = NULL;
2699 memmove(buf, start, length);
2701 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2702 h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2703 lock_basic_lock(&http2_query_buffer_count_lock);
2704 http2_query_buffer_count -= expectb64len;
2705 lock_basic_unlock(&http2_query_buffer_count_lock);
2706 sldns_buffer_free(h2_stream->qbuffer);
2707 h2_stream->qbuffer = NULL;
2711 if(!(b64len = sldns_b64url_pton(
2712 (char const *)start, length,
2713 sldns_buffer_current(h2_stream->qbuffer),
2714 expectb64len)) || b64len < 0) {
2715 lock_basic_lock(&http2_query_buffer_count_lock);
2716 http2_query_buffer_count -= expectb64len;
2717 lock_basic_unlock(&http2_query_buffer_count_lock);
2718 sldns_buffer_free(h2_stream->qbuffer);
2719 h2_stream->qbuffer = NULL;
2720 /* return without error, method can be an
2725 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2729 /** nghttp2 callback. Used to parse headers from HEADER frames. */
2730 static int http2_req_header_cb(nghttp2_session* session,
2731 const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2732 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2735 struct http2_stream* h2_stream = NULL;
2736 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2737 /* nghttp2 deals with CONTINUATION frames and provides them as part of
2739 if(frame->hd.type != NGHTTP2_HEADERS ||
2740 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2741 /* only interrested in request headers */
2744 if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2745 frame->hd.stream_id)))
2748 /* earlier checks already indicate we can stop handling this query */
2749 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2750 h2_stream->invalid_content_type ||
2751 h2_stream->invalid_endpoint)
2755 /* nghttp2 performs some sanity checks in the headers, including:
2756 * name and value are guaranteed to be null terminated
2757 * name is guaranteed to be lowercase
2758 * content-length value is guaranteed to contain digits
2761 if(!h2_stream->http_method && namelen == 7 &&
2762 memcmp(":method", name, namelen) == 0) {
2763 /* Case insensitive check on :method value to be on the safe
2764 * side. I failed to find text about case sensitivity in specs.
2766 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2767 h2_stream->http_method = HTTP_METHOD_GET;
2768 else if(valuelen == 4 &&
2769 strcasecmp("POST", (const char*)value) == 0) {
2770 h2_stream->http_method = HTTP_METHOD_POST;
2771 if(h2_stream->qbuffer) {
2772 /* POST method uses query from DATA frames */
2773 lock_basic_lock(&http2_query_buffer_count_lock);
2774 http2_query_buffer_count -=
2775 sldns_buffer_capacity(h2_stream->qbuffer);
2776 lock_basic_unlock(&http2_query_buffer_count_lock);
2777 sldns_buffer_free(h2_stream->qbuffer);
2778 h2_stream->qbuffer = NULL;
2781 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2784 if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2785 /* :path may contain DNS query, depending on method. Method might
2786 * not be known yet here, so check after finishing receiving
2788 #define HTTP_QUERY_PARAM "?dns="
2789 size_t el = strlen(h2_session->c->http_endpoint);
2790 size_t qpl = strlen(HTTP_QUERY_PARAM);
2792 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2794 h2_stream->invalid_endpoint = 1;
2797 /* larger than endpoint only allowed if it is for the query
2799 if(valuelen <= el+qpl ||
2800 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2802 h2_stream->invalid_endpoint = 1;
2806 if(!http2_buffer_uri_query(h2_session, h2_stream,
2807 value+(el+qpl), valuelen-(el+qpl))) {
2808 return NGHTTP2_ERR_CALLBACK_FAILURE;
2812 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2813 * and not needed when using GET. Don't enfore.
2814 * If set only allow lowercase "application/dns-message".
2816 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2817 * be able to handle "application/dns-message". Since that is the only
2818 * content-type supported we can ignore the accept header.
2820 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2821 if(valuelen != 23 || memcmp("application/dns-message", value,
2823 h2_stream->invalid_content_type = 1;
2827 /* Only interested in content-lentg for POST (on not yet known) method.
2829 if((!h2_stream->http_method ||
2830 h2_stream->http_method == HTTP_METHOD_POST) &&
2831 !h2_stream->content_length && namelen == 14 &&
2832 memcmp("content-length", name, namelen) == 0) {
2834 h2_stream->query_too_large = 1;
2837 /* guaranteed to only contian digits and be null terminated */
2838 h2_stream->content_length = atoi((const char*)value);
2839 if(h2_stream->content_length >
2840 h2_session->c->http2_stream_max_qbuffer_size) {
2841 h2_stream->query_too_large = 1;
2848 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2849 * queries in POST requests. */
2850 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2851 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2852 size_t len, void* cb_arg)
2854 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2855 struct http2_stream* h2_stream;
2858 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2859 h2_session->session, stream_id))) {
2863 if(h2_stream->query_too_large)
2866 if(!h2_stream->qbuffer) {
2867 if(h2_stream->content_length) {
2868 if(h2_stream->content_length < len)
2869 /* getting more data in DATA frame than
2870 * advertised in content-length header. */
2871 return NGHTTP2_ERR_CALLBACK_FAILURE;
2872 qlen = h2_stream->content_length;
2873 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2874 /* setting this to msg-buffer-size can result in a lot
2875 * of memory consuption. Most queries should fit in a
2876 * single DATA frame, and most POST queries will
2877 * containt content-length which does not impose this
2882 if(!h2_stream->qbuffer && qlen) {
2883 lock_basic_lock(&http2_query_buffer_count_lock);
2884 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2885 lock_basic_unlock(&http2_query_buffer_count_lock);
2886 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2887 "in http2-query-buffer-size");
2888 return http2_submit_rst_stream(h2_session, h2_stream);
2890 http2_query_buffer_count += qlen;
2891 lock_basic_unlock(&http2_query_buffer_count_lock);
2892 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2893 lock_basic_lock(&http2_query_buffer_count_lock);
2894 http2_query_buffer_count -= qlen;
2895 lock_basic_unlock(&http2_query_buffer_count_lock);
2899 if(!h2_stream->qbuffer ||
2900 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2901 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2902 "buffer space for POST query. Can happen on multi "
2903 "frame requests without content-length header");
2904 h2_stream->query_too_large = 1;
2908 sldns_buffer_write(h2_stream->qbuffer, data, len);
2913 void http2_req_stream_clear(struct http2_stream* h2_stream)
2915 if(h2_stream->qbuffer) {
2916 lock_basic_lock(&http2_query_buffer_count_lock);
2917 http2_query_buffer_count -=
2918 sldns_buffer_capacity(h2_stream->qbuffer);
2919 lock_basic_unlock(&http2_query_buffer_count_lock);
2920 sldns_buffer_free(h2_stream->qbuffer);
2921 h2_stream->qbuffer = NULL;
2923 if(h2_stream->rbuffer) {
2924 lock_basic_lock(&http2_response_buffer_count_lock);
2925 http2_response_buffer_count -=
2926 sldns_buffer_capacity(h2_stream->rbuffer);
2927 lock_basic_unlock(&http2_response_buffer_count_lock);
2928 sldns_buffer_free(h2_stream->rbuffer);
2929 h2_stream->rbuffer = NULL;
2933 nghttp2_session_callbacks* http2_req_callbacks_create(void)
2935 nghttp2_session_callbacks *callbacks;
2936 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
2937 log_err("failed to initialize nghttp2 callback");
2940 /* reception of header block started, used to create h2_stream */
2941 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
2942 http2_req_begin_headers_cb);
2943 /* complete frame received, used to get data from stream if frame
2944 * has end stream flag, and start processing query */
2945 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
2946 http2_req_frame_recv_cb);
2947 /* get request info from headers */
2948 nghttp2_session_callbacks_set_on_header_callback(callbacks,
2949 http2_req_header_cb);
2950 /* get data from DATA frames, containing POST query */
2951 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
2952 http2_req_data_chunk_recv_cb);
2954 /* generic HTTP2 callbacks */
2955 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
2956 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
2957 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
2958 http2_stream_close_cb);
2962 #endif /* HAVE_NGHTTP2 */