2 * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file has functions to get queries from clients.
42 #ifdef HAVE_SYS_TYPES_H
43 # include <sys/types.h>
46 #ifdef USE_TCP_FASTOPEN
47 #include <netinet/tcp.h>
49 #include "services/listen_dnsport.h"
50 #include "services/outside_network.h"
51 #include "util/netevent.h"
53 #include "util/config_file.h"
54 #include "util/net_help.h"
55 #include "sldns/sbuffer.h"
56 #include "sldns/parseutil.h"
57 #include "services/mesh.h"
58 #include "util/fptr_wlist.h"
59 #include "util/locks.h"
71 #include <systemd/sd-daemon.h>
81 /** number of queued TCP connections for listen() */
82 #define TCP_BACKLOG 256
84 /** number of simultaneous requests a client can have */
85 #define TCP_MAX_REQ_SIMULTANEOUS 32
87 #ifndef THREADS_DISABLED
88 /** lock on the counter of stream buffer memory */
89 static lock_basic_type stream_wait_count_lock;
90 /** lock on the counter of HTTP2 query buffer memory */
91 static lock_basic_type http2_query_buffer_count_lock;
92 /** lock on the counter of HTTP2 response buffer memory */
93 static lock_basic_type http2_response_buffer_count_lock;
95 /** size (in bytes) of stream wait buffers */
96 static size_t stream_wait_count = 0;
97 /** is the lock initialised for stream wait buffers */
98 static int stream_wait_lock_inited = 0;
99 /** size (in bytes) of HTTP2 query buffers */
100 static size_t http2_query_buffer_count = 0;
101 /** is the lock initialised for HTTP2 query buffers */
102 static int http2_query_buffer_lock_inited = 0;
103 /** size (in bytes) of HTTP2 response buffers */
104 static size_t http2_response_buffer_count = 0;
105 /** is the lock initialised for HTTP2 response buffers */
106 static int http2_response_buffer_lock_inited = 0;
109 * Debug print of the getaddrinfo returned address.
110 * @param addr: the address returned.
113 verbose_print_addr(struct addrinfo *addr)
115 if(verbosity >= VERB_ALGO) {
117 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
119 if(addr->ai_family == AF_INET6)
120 sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
123 if(inet_ntop(addr->ai_family, sinaddr, buf,
124 (socklen_t)sizeof(buf)) == 0) {
125 (void)strlcpy(buf, "(null)", sizeof(buf));
127 buf[sizeof(buf)-1] = 0;
128 verbose(VERB_ALGO, "creating %s%s socket %s %d",
129 addr->ai_socktype==SOCK_DGRAM?"udp":
130 addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
131 addr->ai_family==AF_INET?"4":
132 addr->ai_family==AF_INET6?"6":
134 ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
140 systemd_get_activated(int family, int socktype, int listen,
141 struct sockaddr *addr, socklen_t addrlen,
147 const char* listen_pid, *listen_fds;
149 /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
151 if((r = sd_booted()) < 1) {
153 log_warn("systemd is not running");
155 log_err("systemd sd_booted(): %s", strerror(-r));
159 listen_pid = getenv("LISTEN_PID");
160 listen_fds = getenv("LISTEN_FDS");
163 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
168 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
172 if((r = sd_listen_fds(0)) < 1) {
174 log_warn("systemd: did not return socket, check unit configuration");
176 log_err("systemd sd_listen_fds(): %s", strerror(-r));
180 for(i = 0; i < r; i++) {
181 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
182 s = SD_LISTEN_FDS_START + i;
188 log_err_addr("systemd sd_listen_fds()",
190 (struct sockaddr_storage *)addr, addrlen);
192 log_err("systemd sd_listen_fds(): %s", path);
199 create_udp_sock(int family, int socktype, struct sockaddr* addr,
200 socklen_t addrlen, int v6only, int* inuse, int* noproto,
201 int rcv, int snd, int listen, int* reuseport, int transparent,
202 int freebind, int use_systemd, int dscp)
206 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
210 int mtu = IPV6_MIN_MTU;
212 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
215 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
221 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
224 #if !defined(IP_FREEBIND)
228 int got_fd_from_systemd = 0;
232 && (s = systemd_get_activated(family, socktype, -1, addr,
233 addrlen, NULL)) == -1)) {
237 if((s = socket(family, socktype, 0)) == -1) {
240 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
245 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
246 WSAGetLastError() == WSAEPROTONOSUPPORT) {
251 log_err("can't create socket: %s", sock_strerror(errno));
257 got_fd_from_systemd = 1;
262 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
263 (socklen_t)sizeof(on)) < 0) {
264 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
265 sock_strerror(errno));
267 if(errno != ENOSYS) {
280 #endif /* SO_REUSEADDR */
282 # ifdef SO_REUSEPORT_LB
283 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
284 * like SO_REUSEPORT on Linux. This is what the users want
285 * with the config option in unbound.conf; if we actually
286 * need local address and port reuse they'll also need to
287 * have SO_REUSEPORT set for them, assume it was _LB they want.
289 if (reuseport && *reuseport &&
290 setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
291 (socklen_t)sizeof(on)) < 0) {
293 if(errno != ENOPROTOOPT || verbosity >= 3)
294 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
297 /* this option is not essential, we can continue */
300 # else /* no SO_REUSEPORT_LB */
302 /* try to set SO_REUSEPORT so that incoming
303 * queries are distributed evenly among the receiving threads.
304 * Each thread must have its own socket bound to the same port,
305 * with SO_REUSEPORT set on each socket.
307 if (reuseport && *reuseport &&
308 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
309 (socklen_t)sizeof(on)) < 0) {
311 if(errno != ENOPROTOOPT || verbosity >= 3)
312 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
315 /* this option is not essential, we can continue */
318 # endif /* SO_REUSEPORT_LB */
321 #endif /* defined(SO_REUSEPORT) */
322 #ifdef IP_TRANSPARENT
324 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
325 (socklen_t)sizeof(on)) < 0) {
326 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
329 #elif defined(IP_BINDANY)
331 setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
332 (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
333 (void*)&on, (socklen_t)sizeof(on)) < 0) {
334 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
335 (family==AF_INET6?"V6":""), strerror(errno));
337 #elif defined(SO_BINDANY)
339 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
340 (socklen_t)sizeof(on)) < 0) {
341 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
344 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
348 setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
349 (socklen_t)sizeof(on)) < 0) {
350 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
353 #endif /* IP_FREEBIND */
357 socklen_t slen = (socklen_t)sizeof(got);
358 # ifdef SO_RCVBUFFORCE
359 /* Linux specific: try to use root permission to override
360 * system limits on rcvbuf. The limit is stored in
361 * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
362 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv,
363 (socklen_t)sizeof(rcv)) < 0) {
365 log_err("setsockopt(..., SO_RCVBUFFORCE, "
366 "...) failed: %s", sock_strerror(errno));
372 # endif /* SO_RCVBUFFORCE */
373 if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv,
374 (socklen_t)sizeof(rcv)) < 0) {
375 log_err("setsockopt(..., SO_RCVBUF, "
376 "...) failed: %s", sock_strerror(errno));
382 /* check if we got the right thing or if system
383 * reduced to some system max. Warn if so */
384 if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got,
385 &slen) >= 0 && got < rcv/2) {
386 log_warn("so-rcvbuf %u was not granted. "
387 "Got %u. To fix: start with "
388 "root permissions(linux) or sysctl "
389 "bigger net.core.rmem_max(linux) or "
390 "kern.ipc.maxsockbuf(bsd) values.",
391 (unsigned)rcv, (unsigned)got);
393 # ifdef SO_RCVBUFFORCE
396 #endif /* SO_RCVBUF */
398 /* first do RCVBUF as the receive buffer is more important */
402 socklen_t slen = (socklen_t)sizeof(got);
403 # ifdef SO_SNDBUFFORCE
404 /* Linux specific: try to use root permission to override
405 * system limits on sndbuf. The limit is stored in
406 * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
407 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd,
408 (socklen_t)sizeof(snd)) < 0) {
410 log_err("setsockopt(..., SO_SNDBUFFORCE, "
411 "...) failed: %s", sock_strerror(errno));
417 # endif /* SO_SNDBUFFORCE */
418 if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd,
419 (socklen_t)sizeof(snd)) < 0) {
420 log_err("setsockopt(..., SO_SNDBUF, "
421 "...) failed: %s", sock_strerror(errno));
427 /* check if we got the right thing or if system
428 * reduced to some system max. Warn if so */
429 if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got,
430 &slen) >= 0 && got < snd/2) {
431 log_warn("so-sndbuf %u was not granted. "
432 "Got %u. To fix: start with "
433 "root permissions(linux) or sysctl "
434 "bigger net.core.wmem_max(linux) or "
435 "kern.ipc.maxsockbuf(bsd) values.",
436 (unsigned)snd, (unsigned)got);
438 # ifdef SO_SNDBUFFORCE
441 #endif /* SO_SNDBUF */
443 err = set_ip_dscp(s, family, dscp);
445 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
446 if(family == AF_INET6) {
447 # if defined(IPV6_V6ONLY)
449 int val=(v6only==2)?0:1;
450 if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
451 (void*)&val, (socklen_t)sizeof(val)) < 0) {
452 log_err("setsockopt(..., IPV6_V6ONLY"
453 ", ...) failed: %s", sock_strerror(errno));
461 # if defined(IPV6_USE_MIN_MTU)
463 * There is no fragmentation of IPv6 datagrams
464 * during forwarding in the network. Therefore
465 * we do not send UDP datagrams larger than
466 * the minimum IPv6 MTU of 1280 octets. The
467 * EDNS0 message length can be larger if the
468 * network stack supports IPV6_USE_MIN_MTU.
470 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
471 (void*)&on, (socklen_t)sizeof(on)) < 0) {
472 log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
473 "...) failed: %s", sock_strerror(errno));
479 # elif defined(IPV6_MTU)
481 * On Linux, to send no larger than 1280, the PMTUD is
482 * disabled by default for datagrams anyway, so we set
485 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
486 (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
487 log_err("setsockopt(..., IPV6_MTU, ...) failed: %s",
488 sock_strerror(errno));
494 # endif /* IPv6 MTU */
495 } else if(family == AF_INET) {
496 # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
497 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
498 * PMTU information is not accepted, but fragmentation is allowed
499 * if and only if the packet size exceeds the outgoing interface MTU
500 * (and also uses the interface mtu to determine the size of the packets).
501 * So there won't be any EMSGSIZE error. Against DNS fragmentation attacks.
502 * FreeBSD already has same semantics without setting the option. */
505 # if defined(IP_PMTUDISC_OMIT)
506 action = IP_PMTUDISC_OMIT;
507 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
508 &action, (socklen_t)sizeof(action)) < 0) {
510 if (errno != EINVAL) {
511 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
525 action = IP_PMTUDISC_DONT;
526 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
527 &action, (socklen_t)sizeof(action)) < 0) {
528 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
536 # elif defined(IP_DONTFRAG)
538 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG,
539 &off, (socklen_t)sizeof(off)) < 0) {
540 log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
547 # endif /* IPv4 MTU */
551 !got_fd_from_systemd &&
553 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
558 *inuse = (errno == EADDRINUSE);
559 /* detect freebsd jail with no ipv6 permission */
560 if(family==AF_INET6 && errno==EINVAL)
562 else if(errno != EADDRINUSE &&
563 !(errno == EACCES && verbosity < 4 && !listen)
565 && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
568 log_err_addr("can't bind socket", strerror(errno),
569 (struct sockaddr_storage*)addr, addrlen);
571 #endif /* EADDRINUSE */
572 #else /* USE_WINSOCK */
573 if(WSAGetLastError() != WSAEADDRINUSE &&
574 WSAGetLastError() != WSAEADDRNOTAVAIL &&
575 !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
576 log_err_addr("can't bind socket",
577 wsa_strerror(WSAGetLastError()),
578 (struct sockaddr_storage*)addr, addrlen);
580 #endif /* USE_WINSOCK */
584 if(!fd_set_nonblock(s)) {
594 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
595 int* reuseport, int transparent, int mss, int nodelay, int freebind,
596 int use_systemd, int dscp)
600 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
604 int got_fd_from_systemd = 0;
606 #ifdef USE_TCP_FASTOPEN
609 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
612 #if !defined(IP_FREEBIND)
615 verbose_print_addr(addr);
620 && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
621 addr->ai_addr, addr->ai_addrlen,
626 if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
628 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
633 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
634 WSAGetLastError() == WSAEPROTONOSUPPORT) {
639 log_err("can't create socket: %s", sock_strerror(errno));
643 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
644 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
645 (socklen_t)sizeof(on)) < 0) {
647 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
650 log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
651 wsa_strerror(WSAGetLastError()));
655 log_warn(" setsockopt(TCP_NODELAY) unsupported");
656 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
659 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
660 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
661 (socklen_t)sizeof(mss)) < 0) {
662 log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
663 sock_strerror(errno));
666 " tcp socket mss set to %d", mss);
669 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
670 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
674 got_fd_from_systemd = 1;
678 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
679 (socklen_t)sizeof(on)) < 0) {
680 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
681 sock_strerror(errno));
685 #endif /* SO_REUSEADDR */
687 if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
688 (socklen_t)sizeof(on)) < 0) {
689 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
692 #endif /* IP_FREEBIND */
694 /* try to set SO_REUSEPORT so that incoming
695 * connections are distributed evenly among the receiving threads.
696 * Each thread must have its own socket bound to the same port,
697 * with SO_REUSEPORT set on each socket.
699 if (reuseport && *reuseport &&
700 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
701 (socklen_t)sizeof(on)) < 0) {
703 if(errno != ENOPROTOOPT || verbosity >= 3)
704 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
707 /* this option is not essential, we can continue */
712 #endif /* defined(SO_REUSEPORT) */
713 #if defined(IPV6_V6ONLY)
714 if(addr->ai_family == AF_INET6 && v6only) {
715 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY,
716 (void*)&on, (socklen_t)sizeof(on)) < 0) {
717 log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
718 sock_strerror(errno));
725 #endif /* IPV6_V6ONLY */
726 #ifdef IP_TRANSPARENT
728 setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
729 (socklen_t)sizeof(on)) < 0) {
730 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
733 #elif defined(IP_BINDANY)
735 setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
736 (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
737 (void*)&on, (socklen_t)sizeof(on)) < 0) {
738 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
739 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
741 #elif defined(SO_BINDANY)
743 setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
745 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
748 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
749 err = set_ip_dscp(s, addr->ai_family, dscp);
751 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
754 !got_fd_from_systemd &&
756 bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
758 /* detect freebsd jail with no ipv6 permission */
759 if(addr->ai_family==AF_INET6 && errno==EINVAL)
762 log_err_addr("can't bind socket", strerror(errno),
763 (struct sockaddr_storage*)addr->ai_addr,
767 log_err_addr("can't bind socket",
768 wsa_strerror(WSAGetLastError()),
769 (struct sockaddr_storage*)addr->ai_addr,
775 if(!fd_set_nonblock(s)) {
779 if(listen(s, TCP_BACKLOG) == -1) {
780 log_err("can't listen: %s", sock_strerror(errno));
784 #ifdef USE_TCP_FASTOPEN
785 /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
786 against IP spoofing attacks as suggested in RFC7413 */
788 /* OS X implementation only supports qlen of 1 via this call. Actual
789 value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
792 /* 5 is recommended on linux */
795 if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen,
796 sizeof(qlen))) == -1 ) {
798 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
799 disabled, except when verbosity enabled for debugging */
800 if(errno != ENOPROTOOPT || verbosity >= 3) {
803 log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
805 log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
816 set_ip_dscp(int socket, int addrfamily, int dscp)
825 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0)
826 return sock_strerror(errno);
829 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
830 return sock_strerror(errno);
837 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
842 if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
848 struct sockaddr_un usock;
853 verbose(VERB_ALGO, "creating unix socket %s", path);
854 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
855 /* this member exists on BSDs, not Linux */
856 usock.sun_len = (unsigned)sizeof(usock);
858 usock.sun_family = AF_LOCAL;
859 /* length is 92-108, 104 on FreeBSD */
860 (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
862 if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
863 log_err("Cannot create local socket %s (%s)",
864 path, strerror(errno));
868 if (unlink(path) && errno != ENOENT) {
869 /* The socket already exists and cannot be removed */
870 log_err("Cannot remove old local socket %s (%s)",
871 path, strerror(errno));
875 if (bind(s, (struct sockaddr *)&usock,
876 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
877 log_err("Cannot bind local socket %s (%s)",
878 path, strerror(errno));
882 if (!fd_set_nonblock(s)) {
883 log_err("Cannot set non-blocking mode");
887 if (listen(s, TCP_BACKLOG) == -1) {
888 log_err("can't listen: %s", strerror(errno));
892 (void)noproto; /*unused*/
905 log_err("Local sockets are not supported");
913 * Create socket from getaddrinfo results
916 make_sock(int stype, const char* ifname, const char* port,
917 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
918 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
919 int use_systemd, int dscp)
921 struct addrinfo *res = NULL;
922 int r, s, inuse, noproto;
923 hints->ai_socktype = stype;
925 if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
927 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
928 *noip6 = 1; /* 'Host not found' for IP6 on winXP */
932 log_err("node %s:%s getaddrinfo: %s %s",
933 ifname?ifname:"default", port, gai_strerror(r),
935 r==EAI_SYSTEM?(char*)strerror(errno):""
942 if(stype == SOCK_DGRAM) {
943 verbose_print_addr(res);
944 s = create_udp_sock(res->ai_family, res->ai_socktype,
945 (struct sockaddr*)res->ai_addr, res->ai_addrlen,
946 v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
947 reuseport, transparent, freebind, use_systemd, dscp);
948 if(s == -1 && inuse) {
949 log_err("bind: address already in use");
950 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
954 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
955 transparent, tcp_mss, nodelay, freebind, use_systemd,
957 if(s == -1 && noproto && hints->ai_family == AF_INET6){
965 /** make socket and first see if ifname contains port override info */
967 make_sock_port(int stype, const char* ifname, const char* port,
968 struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
969 int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
970 int use_systemd, int dscp)
972 char* s = strchr(ifname, '@');
974 /* override port with ifspec@port */
977 if((size_t)(s-ifname) >= sizeof(newif)) {
978 log_err("ifname too long: %s", ifname);
982 if(strlen(s+1) >= sizeof(p)) {
983 log_err("portnumber too long: %s", ifname);
987 (void)strlcpy(newif, ifname, sizeof(newif));
989 (void)strlcpy(p, s+1, sizeof(p));
991 return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
992 snd, reuseport, transparent, tcp_mss, nodelay, freebind,
995 return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
996 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1001 * Add port to open ports list.
1002 * @param list: list head. changed.
1004 * @param ftype: if fd is UDP.
1005 * @return false on failure. list in unchanged then.
1008 port_insert(struct listen_port** list, int s, enum listen_type ftype)
1010 struct listen_port* item = (struct listen_port*)malloc(
1011 sizeof(struct listen_port));
1016 item->ftype = ftype;
1021 /** set fd to receive source address packet info */
1023 set_recvpktinfo(int s, int family)
1025 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1030 if(family == AF_INET6) {
1031 # ifdef IPV6_RECVPKTINFO
1032 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1033 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1034 log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1038 # elif defined(IPV6_PKTINFO)
1039 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1040 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1041 log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1046 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
1047 "disable interface-automatic or do-ip6 in config");
1049 # endif /* defined IPV6_RECVPKTINFO */
1051 } else if(family == AF_INET) {
1053 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1054 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1055 log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1059 # elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1060 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1061 (void*)&on, (socklen_t)sizeof(on)) < 0) {
1062 log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1067 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1068 "interface-automatic or do-ip4 in config");
1070 # endif /* IP_PKTINFO */
1076 /** see if interface is ssl, its port number == the ssl port number */
1078 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1079 struct config_strlist* tls_additional_port)
1081 struct config_strlist* s;
1082 char* p = strchr(ifname, '@');
1083 if(!p && atoi(port) == ssl_port)
1085 if(p && atoi(p+1) == ssl_port)
1087 for(s = tls_additional_port; s; s = s->next) {
1088 if(p && atoi(p+1) == atoi(s->str))
1090 if(!p && atoi(port) == atoi(s->str))
1096 /** see if interface is https, its port number == the https port number */
1098 if_is_https(const char* ifname, const char* port, int https_port)
1100 char* p = strchr(ifname, '@');
1101 if(!p && atoi(port) == https_port)
1103 if(p && atoi(p+1) == https_port)
1109 * Helper for ports_open. Creates one interface (or NULL for default).
1110 * @param ifname: The interface ip address.
1111 * @param do_auto: use automatic interface detection.
1112 * If enabled, then ifname must be the wildcard name.
1113 * @param do_udp: if udp should be used.
1114 * @param do_tcp: if udp should be used.
1115 * @param hints: for getaddrinfo. family and flags have to be set by caller.
1116 * @param port: Port number to use (as string).
1117 * @param list: list of open ports, appended to, changed to point to list head.
1118 * @param rcv: receive buffer size for UDP
1119 * @param snd: send buffer size for UDP
1120 * @param ssl_port: ssl service port number
1121 * @param tls_additional_port: list of additional ssl service port numbers.
1122 * @param https_port: DoH service port number
1123 * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1124 * set to false on exit if reuseport failed due to no kernel support.
1125 * @param transparent: set IP_TRANSPARENT socket option.
1126 * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1127 * @param freebind: set IP_FREEBIND socket option.
1128 * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1129 * @param use_systemd: if true, fetch sockets from systemd.
1130 * @param dnscrypt_port: dnscrypt service port number
1131 * @param dscp: DSCP to use.
1132 * @return: returns false on error.
1135 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
1136 struct addrinfo *hints, const char* port, struct listen_port** list,
1137 size_t rcv, size_t snd, int ssl_port,
1138 struct config_strlist* tls_additional_port, int https_port,
1139 int* reuseport, int transparent, int tcp_mss, int freebind,
1140 int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1143 int is_https = if_is_https(ifname, port, https_port);
1144 int nodelay = is_https && http2_nodelay;
1146 int is_dnscrypt = ((strchr(ifname, '@') &&
1147 atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1148 (!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1150 int is_dnscrypt = 0;
1151 (void)dnscrypt_port;
1154 if(!do_udp && !do_tcp)
1157 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1158 &noip6, rcv, snd, reuseport, transparent,
1159 tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) {
1161 log_warn("IPv6 protocol not available");
1166 /* getting source addr packet info is highly non-portable */
1167 if(!set_recvpktinfo(s, hints->ai_family)) {
1171 if(!port_insert(list, s,
1172 is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) {
1177 /* regular udp socket */
1178 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
1179 &noip6, rcv, snd, reuseport, transparent,
1180 tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) {
1182 log_warn("IPv6 protocol not available");
1187 if(!port_insert(list, s,
1188 is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) {
1194 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1195 tls_additional_port);
1196 enum listen_type port_type;
1198 port_type = listen_type_ssl;
1200 port_type = listen_type_http;
1201 else if(is_dnscrypt)
1202 port_type = listen_type_tcp_dnscrypt;
1204 port_type = listen_type_tcp;
1205 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
1206 &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1207 freebind, use_systemd, dscp)) == -1) {
1209 /*log_warn("IPv6 protocol not available");*/
1215 verbose(VERB_ALGO, "setup TCP for SSL service");
1216 if(!port_insert(list, s, port_type)) {
1225 * Add items to commpoint list in front.
1226 * @param c: commpoint to add.
1227 * @param front: listen struct.
1228 * @return: false on failure.
1231 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1233 struct listen_list* item = (struct listen_list*)malloc(
1234 sizeof(struct listen_list));
1238 item->next = front->cps;
1243 struct listen_dnsport*
1244 listen_create(struct comm_base* base, struct listen_port* ports,
1245 size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1246 int harden_large_queries, uint32_t http_max_streams,
1247 char* http_endpoint, struct tcl_list* tcp_conn_limit, void* sslctx,
1248 struct dt_env* dtenv, comm_point_callback_type* cb, void *cb_arg)
1250 struct listen_dnsport* front = (struct listen_dnsport*)
1251 malloc(sizeof(struct listen_dnsport));
1255 front->udp_buff = sldns_buffer_new(bufsize);
1257 front->dnscrypt_udp_buff = NULL;
1259 if(!front->udp_buff) {
1263 if(!stream_wait_lock_inited) {
1264 lock_basic_init(&stream_wait_count_lock);
1265 stream_wait_lock_inited = 1;
1267 if(!http2_query_buffer_lock_inited) {
1268 lock_basic_init(&http2_query_buffer_count_lock);
1269 http2_query_buffer_lock_inited = 1;
1271 if(!http2_response_buffer_lock_inited) {
1272 lock_basic_init(&http2_response_buffer_count_lock);
1273 http2_response_buffer_lock_inited = 1;
1276 /* create comm points as needed */
1278 struct comm_point* cp = NULL;
1279 if(ports->ftype == listen_type_udp ||
1280 ports->ftype == listen_type_udp_dnscrypt)
1281 cp = comm_point_create_udp(base, ports->fd,
1282 front->udp_buff, cb, cb_arg);
1283 else if(ports->ftype == listen_type_tcp ||
1284 ports->ftype == listen_type_tcp_dnscrypt)
1285 cp = comm_point_create_tcp(base, ports->fd,
1286 tcp_accept_count, tcp_idle_timeout,
1287 harden_large_queries, 0, NULL,
1288 tcp_conn_limit, bufsize, front->udp_buff,
1289 ports->ftype, cb, cb_arg);
1290 else if(ports->ftype == listen_type_ssl ||
1291 ports->ftype == listen_type_http) {
1292 cp = comm_point_create_tcp(base, ports->fd,
1293 tcp_accept_count, tcp_idle_timeout,
1294 harden_large_queries,
1295 http_max_streams, http_endpoint,
1296 tcp_conn_limit, bufsize, front->udp_buff,
1297 ports->ftype, cb, cb_arg);
1299 if(ports->ftype == listen_type_http) {
1301 log_warn("HTTPS port configured, but no TLS "
1302 "tls-service-key or tls-service-pem "
1305 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1306 log_warn("Unbound is not compiled with an "
1307 "OpenSSL version supporting ALPN "
1308 " (OpenSSL >= 1.0.2). This is required "
1309 "to use DNS-over-HTTPS");
1311 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1312 log_warn("Unbound is not compiled with "
1313 "nghttp2. This is required to use "
1317 } else if(ports->ftype == listen_type_udpancil ||
1318 ports->ftype == listen_type_udpancil_dnscrypt)
1319 cp = comm_point_create_udp_ancil(base, ports->fd,
1320 front->udp_buff, cb, cb_arg);
1322 log_err("can't create commpoint");
1323 listen_delete(front);
1327 cp->do_not_close = 1;
1329 if (ports->ftype == listen_type_udp_dnscrypt ||
1330 ports->ftype == listen_type_tcp_dnscrypt ||
1331 ports->ftype == listen_type_udpancil_dnscrypt) {
1333 cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1334 if(!cp->dnscrypt_buffer) {
1335 log_err("can't alloc dnscrypt_buffer");
1336 comm_point_delete(cp);
1337 listen_delete(front);
1340 front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1343 if(!listen_cp_insert(cp, front)) {
1344 log_err("malloc failed");
1345 comm_point_delete(cp);
1346 listen_delete(front);
1349 ports = ports->next;
1352 log_err("Could not open sockets to accept queries.");
1353 listen_delete(front);
1361 listen_list_delete(struct listen_list* list)
1363 struct listen_list *p = list, *pn;
1366 comm_point_delete(p->com);
1373 listen_delete(struct listen_dnsport* front)
1377 listen_list_delete(front->cps);
1379 if(front->dnscrypt_udp_buff &&
1380 front->udp_buff != front->dnscrypt_udp_buff) {
1381 sldns_buffer_free(front->dnscrypt_udp_buff);
1384 sldns_buffer_free(front->udp_buff);
1386 if(stream_wait_lock_inited) {
1387 stream_wait_lock_inited = 0;
1388 lock_basic_destroy(&stream_wait_count_lock);
1390 if(http2_query_buffer_lock_inited) {
1391 http2_query_buffer_lock_inited = 0;
1392 lock_basic_destroy(&http2_query_buffer_count_lock);
1394 if(http2_response_buffer_lock_inited) {
1395 http2_response_buffer_lock_inited = 0;
1396 lock_basic_destroy(&http2_response_buffer_count_lock);
1400 #ifdef HAVE_GETIFADDRS
1402 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1404 struct ifaddrs *ifa;
1405 int last_ip_addresses_size = *ip_addresses_size;
1407 for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1410 #ifdef INET6 /* | address ip | % | ifa name | @ | port | nul */
1411 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1413 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1416 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1417 if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1418 || strncmp(ifa->ifa_name, search_ifa,
1419 atsign-search_ifa) != 0)
1422 if(strcmp(ifa->ifa_name, search_ifa) != 0)
1427 if(ifa->ifa_addr == NULL)
1430 family = ifa->ifa_addr->sa_family;
1431 if(family == AF_INET) {
1432 char a4[INET_ADDRSTRLEN + 1];
1433 struct sockaddr_in *in4 = (struct sockaddr_in *)
1435 if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1436 log_err("inet_ntop failed");
1439 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1443 else if(family == AF_INET6) {
1444 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1446 char a6[INET6_ADDRSTRLEN + 1];
1447 char if_index_name[IF_NAMESIZE + 1];
1448 if_index_name[0] = 0;
1449 if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1450 log_err("inet_ntop failed");
1453 if_indextoname(in6->sin6_scope_id,
1454 (char *)if_index_name);
1455 if (strlen(if_index_name) != 0) {
1456 snprintf(addr_buf, sizeof(addr_buf),
1457 "%s%%%s%s", a6, if_index_name, atsign);
1459 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1467 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1469 *ip_addresses = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1470 if(!*ip_addresses) {
1471 log_err("realloc failed: out of memory");
1474 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1475 if(!(*ip_addresses)[*ip_addresses_size]) {
1476 log_err("strdup failed: out of memory");
1479 (*ip_addresses_size)++;
1482 if (*ip_addresses_size == last_ip_addresses_size) {
1483 *ip_addresses = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1484 if(!*ip_addresses) {
1485 log_err("realloc failed: out of memory");
1488 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1489 if(!(*ip_addresses)[*ip_addresses_size]) {
1490 log_err("strdup failed: out of memory");
1493 (*ip_addresses_size)++;
1497 #endif /* HAVE_GETIFADDRS */
1499 int resolve_interface_names(struct config_file* cfg, char*** resif,
1502 #ifdef HAVE_GETIFADDRS
1504 struct ifaddrs *addrs = NULL;
1505 if(cfg->num_ifs == 0) {
1510 if(getifaddrs(&addrs) == -1) {
1511 log_err("failed to list interfaces: getifaddrs: %s",
1516 for(i=0; i<cfg->num_ifs; i++) {
1517 if(!resolve_ifa_name(addrs, cfg->ifs[i], resif, num_resif)) {
1519 config_del_strarray(*resif, *num_resif);
1529 if(cfg->num_ifs == 0) {
1534 *num_resif = cfg->num_ifs;
1535 *resif = calloc(*num_resif, sizeof(**resif));
1537 log_err("out of memory");
1540 for(i=0; i<*num_resif; i++) {
1541 (*resif)[i] = strdup(cfg->ifs[i]);
1542 if(!((*resif)[i])) {
1543 log_err("out of memory");
1544 config_del_strarray(*resif, *num_resif);
1551 #endif /* HAVE_GETIFADDRS */
1555 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1558 struct listen_port* list = NULL;
1559 struct addrinfo hints;
1560 int i, do_ip4, do_ip6;
1561 int do_tcp, do_auto;
1563 snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1564 do_ip4 = cfg->do_ip4;
1565 do_ip6 = cfg->do_ip6;
1566 do_tcp = cfg->do_tcp;
1567 do_auto = cfg->if_automatic && cfg->do_udp;
1568 if(cfg->incoming_num_tcp == 0)
1572 memset(&hints, 0, sizeof(hints));
1573 hints.ai_flags = AI_PASSIVE;
1574 /* no name lookups on our listening ports */
1576 hints.ai_flags |= AI_NUMERICHOST;
1577 hints.ai_family = AF_UNSPEC;
1581 if(!do_ip4 && !do_ip6) {
1584 /* create ip4 and ip6 ports so that return addresses are nice. */
1585 if(do_auto || num_ifs == 0) {
1587 hints.ai_family = AF_INET6;
1588 if(!ports_create_if(do_auto?"::0":"::1",
1589 do_auto, cfg->do_udp, do_tcp,
1590 &hints, portbuf, &list,
1591 cfg->so_rcvbuf, cfg->so_sndbuf,
1592 cfg->ssl_port, cfg->tls_additional_port,
1593 cfg->https_port, reuseport, cfg->ip_transparent,
1594 cfg->tcp_mss, cfg->ip_freebind,
1595 cfg->http_nodelay, cfg->use_systemd,
1596 cfg->dnscrypt_port, cfg->ip_dscp)) {
1597 listening_ports_free(list);
1602 hints.ai_family = AF_INET;
1603 if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1",
1604 do_auto, cfg->do_udp, do_tcp,
1605 &hints, portbuf, &list,
1606 cfg->so_rcvbuf, cfg->so_sndbuf,
1607 cfg->ssl_port, cfg->tls_additional_port,
1608 cfg->https_port, reuseport, cfg->ip_transparent,
1609 cfg->tcp_mss, cfg->ip_freebind,
1610 cfg->http_nodelay, cfg->use_systemd,
1611 cfg->dnscrypt_port, cfg->ip_dscp)) {
1612 listening_ports_free(list);
1616 } else for(i = 0; i<num_ifs; i++) {
1617 if(str_is_ip6(ifs[i])) {
1620 hints.ai_family = AF_INET6;
1621 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1622 do_tcp, &hints, portbuf, &list,
1623 cfg->so_rcvbuf, cfg->so_sndbuf,
1624 cfg->ssl_port, cfg->tls_additional_port,
1625 cfg->https_port, reuseport, cfg->ip_transparent,
1626 cfg->tcp_mss, cfg->ip_freebind,
1627 cfg->http_nodelay, cfg->use_systemd,
1628 cfg->dnscrypt_port, cfg->ip_dscp)) {
1629 listening_ports_free(list);
1635 hints.ai_family = AF_INET;
1636 if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1637 do_tcp, &hints, portbuf, &list,
1638 cfg->so_rcvbuf, cfg->so_sndbuf,
1639 cfg->ssl_port, cfg->tls_additional_port,
1640 cfg->https_port, reuseport, cfg->ip_transparent,
1641 cfg->tcp_mss, cfg->ip_freebind,
1642 cfg->http_nodelay, cfg->use_systemd,
1643 cfg->dnscrypt_port, cfg->ip_dscp)) {
1644 listening_ports_free(list);
1652 void listening_ports_free(struct listen_port* list)
1654 struct listen_port* nx;
1657 if(list->fd != -1) {
1658 sock_close(list->fd);
1665 size_t listen_get_mem(struct listen_dnsport* listen)
1667 struct listen_list* p;
1668 size_t s = sizeof(*listen) + sizeof(*listen->base) +
1669 sizeof(*listen->udp_buff) +
1670 sldns_buffer_capacity(listen->udp_buff);
1672 s += sizeof(*listen->dnscrypt_udp_buff);
1673 if(listen->udp_buff != listen->dnscrypt_udp_buff){
1674 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1677 for(p = listen->cps; p; p = p->next) {
1679 s += comm_point_get_mem(p->com);
1684 void listen_stop_accept(struct listen_dnsport* listen)
1686 /* do not stop the ones that have no tcp_free list
1687 * (they have already stopped listening) */
1688 struct listen_list* p;
1689 for(p=listen->cps; p; p=p->next) {
1690 if(p->com->type == comm_tcp_accept &&
1691 p->com->tcp_free != NULL) {
1692 comm_point_stop_listening(p->com);
1697 void listen_start_accept(struct listen_dnsport* listen)
1699 /* do not start the ones that have no tcp_free list, it is no
1700 * use to listen to them because they have no free tcp handlers */
1701 struct listen_list* p;
1702 for(p=listen->cps; p; p=p->next) {
1703 if(p->com->type == comm_tcp_accept &&
1704 p->com->tcp_free != NULL) {
1705 comm_point_start_listening(p->com, -1, -1);
1710 struct tcp_req_info*
1711 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1713 struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1715 log_err("malloc failure for new stream outoforder processing structure");
1718 memset(req, 0, sizeof(*req));
1719 req->spool_buffer = spoolbuf;
1724 tcp_req_info_delete(struct tcp_req_info* req)
1727 tcp_req_info_clear(req);
1728 /* cp is pointer back to commpoint that owns this struct and
1729 * called delete on us */
1730 /* spool_buffer is shared udp buffer, not deleted here */
1734 void tcp_req_info_clear(struct tcp_req_info* req)
1736 struct tcp_req_open_item* open, *nopen;
1737 struct tcp_req_done_item* item, *nitem;
1740 /* free outstanding request mesh reply entries */
1741 open = req->open_req_list;
1744 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1748 req->open_req_list = NULL;
1749 req->num_open_req = 0;
1751 /* free pending writable result packets */
1752 item = req->done_req_list;
1755 lock_basic_lock(&stream_wait_count_lock);
1756 stream_wait_count -= (sizeof(struct tcp_req_done_item)
1758 lock_basic_unlock(&stream_wait_count_lock);
1763 req->done_req_list = NULL;
1764 req->num_done_req = 0;
1765 req->read_is_closed = 0;
1769 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1771 struct tcp_req_open_item* open, *prev = NULL;
1772 if(!req || !m) return;
1773 open = req->open_req_list;
1775 if(open->mesh_state == m) {
1776 struct tcp_req_open_item* next;
1777 if(prev) prev->next = open->next;
1778 else req->open_req_list = open->next;
1779 /* caller has to manage the mesh state reply entry */
1782 req->num_open_req --;
1793 /** setup listening for read or write */
1795 tcp_req_info_setup_listen(struct tcp_req_info* req)
1800 if(req->cp->tcp_byte_count != 0) {
1801 /* cannot change, halfway through */
1805 if(!req->cp->tcp_is_reading)
1807 if(req->num_open_req + req->num_done_req < TCP_MAX_REQ_SIMULTANEOUS &&
1808 !req->read_is_closed)
1812 req->cp->tcp_is_reading = 0;
1813 comm_point_stop_listening(req->cp);
1814 comm_point_start_listening(req->cp, -1,
1815 req->cp->tcp_timeout_msec);
1817 req->cp->tcp_is_reading = 1;
1818 comm_point_stop_listening(req->cp);
1819 comm_point_start_listening(req->cp, -1,
1820 req->cp->tcp_timeout_msec);
1821 /* and also read it (from SSL stack buffers), so
1822 * no event read event is expected since the remainder of
1823 * the TLS frame is sitting in the buffers. */
1824 req->read_again = 1;
1826 comm_point_stop_listening(req->cp);
1827 comm_point_start_listening(req->cp, -1,
1828 req->cp->tcp_timeout_msec);
1829 comm_point_listen_for_rw(req->cp, 0, 0);
1833 /** remove first item from list of pending results */
1834 static struct tcp_req_done_item*
1835 tcp_req_info_pop_done(struct tcp_req_info* req)
1837 struct tcp_req_done_item* item;
1838 log_assert(req->num_done_req > 0 && req->done_req_list);
1839 item = req->done_req_list;
1840 lock_basic_lock(&stream_wait_count_lock);
1841 stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1842 lock_basic_unlock(&stream_wait_count_lock);
1843 req->done_req_list = req->done_req_list->next;
1844 req->num_done_req --;
1848 /** Send given buffer and setup to write */
1850 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1853 sldns_buffer_clear(req->cp->buffer);
1854 sldns_buffer_write(req->cp->buffer, buf, len);
1855 sldns_buffer_flip(req->cp->buffer);
1857 req->cp->tcp_is_reading = 0; /* we are now writing */
1860 /** pick up the next result and start writing it to the channel */
1862 tcp_req_pickup_next_result(struct tcp_req_info* req)
1864 if(req->num_done_req > 0) {
1865 /* unlist the done item from the list of pending results */
1866 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1867 tcp_req_info_start_write_buf(req, item->buf, item->len);
1873 /** the read channel has closed */
1875 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1877 verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1878 /* reset byte count for (potential) partial read */
1879 req->cp->tcp_byte_count = 0;
1880 /* if we still have results to write, pick up next and write it */
1881 if(req->num_done_req != 0) {
1882 tcp_req_pickup_next_result(req);
1883 tcp_req_info_setup_listen(req);
1886 /* if nothing to do, this closes the connection */
1887 if(req->num_open_req == 0 && req->num_done_req == 0)
1889 /* otherwise, we must be waiting for dns resolve, wait with timeout */
1890 req->read_is_closed = 1;
1891 tcp_req_info_setup_listen(req);
1896 tcp_req_info_handle_writedone(struct tcp_req_info* req)
1898 /* back to reading state, we finished this write event */
1899 sldns_buffer_clear(req->cp->buffer);
1900 if(req->num_done_req == 0 && req->read_is_closed) {
1901 /* no more to write and nothing to read, close it */
1902 comm_point_drop_reply(&req->cp->repinfo);
1905 req->cp->tcp_is_reading = 1;
1906 /* see if another result needs writing */
1907 tcp_req_pickup_next_result(req);
1909 /* see if there is more to write, if not stop_listening for writing */
1910 /* see if new requests are allowed, if so, start_listening
1912 tcp_req_info_setup_listen(req);
1916 tcp_req_info_handle_readdone(struct tcp_req_info* req)
1918 struct comm_point* c = req->cp;
1920 /* we want to read up several requests, unless there are
1921 * pending answers */
1925 req->in_worker_handle = 1;
1926 sldns_buffer_set_limit(req->spool_buffer, 0);
1927 /* handle the current request */
1928 /* this calls the worker handle request routine that could give
1929 * a cache response, or localdata response, or drop the reply,
1930 * or schedule a mesh entry for later */
1931 fptr_ok(fptr_whitelist_comm_point(c->callback));
1932 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1933 req->in_worker_handle = 0;
1934 /* there is an answer, put it up. It is already in the
1935 * c->buffer, just send it. */
1936 /* since we were just reading a query, the channel is
1937 * clear to write to */
1939 c->tcp_is_reading = 0;
1940 comm_point_stop_listening(c);
1941 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1944 req->in_worker_handle = 0;
1945 /* it should be waiting in the mesh for recursion.
1946 * If mesh failed to add a new entry and called commpoint_drop_reply.
1947 * Then the mesh state has been cleared. */
1949 /* the reply has been dropped, stream has been closed. */
1952 /* If mesh failed(mallocfail) and called commpoint_send_reply with
1953 * something like servfail then we pick up that reply below. */
1958 sldns_buffer_clear(c->buffer);
1959 /* if pending answers, pick up an answer and start sending it */
1960 tcp_req_pickup_next_result(req);
1962 /* if answers pending, start sending answers */
1963 /* read more requests if we can have more requests */
1964 tcp_req_info_setup_listen(req);
1968 tcp_req_info_add_meshstate(struct tcp_req_info* req,
1969 struct mesh_area* mesh, struct mesh_state* m)
1971 struct tcp_req_open_item* item;
1972 log_assert(req && mesh && m);
1973 item = (struct tcp_req_open_item*)malloc(sizeof(*item));
1975 item->next = req->open_req_list;
1977 item->mesh_state = m;
1978 req->open_req_list = item;
1979 req->num_open_req++;
1983 /** Add a result to the result list. At the end. */
1985 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
1987 struct tcp_req_done_item* last = NULL;
1988 struct tcp_req_done_item* item;
1991 /* see if we have space */
1992 space = sizeof(struct tcp_req_done_item) + len;
1993 lock_basic_lock(&stream_wait_count_lock);
1994 if(stream_wait_count + space > stream_wait_max) {
1995 lock_basic_unlock(&stream_wait_count_lock);
1996 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
1999 stream_wait_count += space;
2000 lock_basic_unlock(&stream_wait_count_lock);
2002 /* find last element */
2003 last = req->done_req_list;
2004 while(last && last->next)
2007 /* create new element */
2008 item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2010 log_err("malloc failure, for stream result list");
2015 item->buf = memdup(buf, len);
2018 log_err("malloc failure, adding reply to stream result list");
2023 if(last) last->next = item;
2024 else req->done_req_list = item;
2025 req->num_done_req++;
2030 tcp_req_info_send_reply(struct tcp_req_info* req)
2032 if(req->in_worker_handle) {
2033 /* reply from mesh is in the spool_buffer */
2034 /* copy now, so that the spool buffer is free for other tasks
2035 * before the callback is done */
2036 sldns_buffer_clear(req->cp->buffer);
2037 sldns_buffer_write(req->cp->buffer,
2038 sldns_buffer_begin(req->spool_buffer),
2039 sldns_buffer_limit(req->spool_buffer));
2040 sldns_buffer_flip(req->cp->buffer);
2044 /* now that the query has been handled, that mesh_reply entry
2045 * should be removed, from the tcp_req_info list,
2046 * the mesh state cleanup removes then with region_cleanup and
2047 * replies_sent true. */
2048 /* see if we can send it straight away (we are not doing
2049 * anything else). If so, copy to buffer and start */
2050 if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2051 /* buffer is free, and was ready to read new query into,
2052 * but we are now going to use it to send this answer */
2053 tcp_req_info_start_write_buf(req,
2054 sldns_buffer_begin(req->spool_buffer),
2055 sldns_buffer_limit(req->spool_buffer));
2056 /* switch to listen to write events */
2057 comm_point_stop_listening(req->cp);
2058 comm_point_start_listening(req->cp, -1,
2059 req->cp->tcp_timeout_msec);
2062 /* queue up the answer behind the others already pending */
2063 if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2064 sldns_buffer_limit(req->spool_buffer))) {
2065 /* drop the connection, we are out of resources */
2066 comm_point_drop_reply(&req->cp->repinfo);
2070 size_t tcp_req_info_get_stream_buffer_size(void)
2073 if(!stream_wait_lock_inited)
2074 return stream_wait_count;
2075 lock_basic_lock(&stream_wait_count_lock);
2076 s = stream_wait_count;
2077 lock_basic_unlock(&stream_wait_count_lock);
2081 size_t http2_get_query_buffer_size(void)
2084 if(!http2_query_buffer_lock_inited)
2085 return http2_query_buffer_count;
2086 lock_basic_lock(&http2_query_buffer_count_lock);
2087 s = http2_query_buffer_count;
2088 lock_basic_unlock(&http2_query_buffer_count_lock);
2092 size_t http2_get_response_buffer_size(void)
2095 if(!http2_response_buffer_lock_inited)
2096 return http2_response_buffer_count;
2097 lock_basic_lock(&http2_response_buffer_count_lock);
2098 s = http2_response_buffer_count;
2099 lock_basic_unlock(&http2_response_buffer_count_lock);
2104 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
2105 static ssize_t http2_submit_response_read_callback(
2106 nghttp2_session* ATTR_UNUSED(session),
2107 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2108 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2110 struct http2_stream* h2_stream;
2111 struct http2_session* h2_session = source->ptr;
2112 size_t copylen = length;
2113 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2114 h2_session->session, stream_id))) {
2115 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2117 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2119 if(!h2_stream->rbuffer ||
2120 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2121 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2122 "available in rbuffer");
2123 /* rbuffer will be free'd in frame close cb */
2124 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2127 if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2128 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2129 if(copylen > SSIZE_MAX)
2130 copylen = SSIZE_MAX; /* will probably never happen */
2132 memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2133 sldns_buffer_skip(h2_stream->rbuffer, copylen);
2135 if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2136 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2137 lock_basic_lock(&http2_response_buffer_count_lock);
2138 http2_response_buffer_count -=
2139 sldns_buffer_capacity(h2_stream->rbuffer);
2140 lock_basic_unlock(&http2_response_buffer_count_lock);
2141 sldns_buffer_free(h2_stream->rbuffer);
2142 h2_stream->rbuffer = NULL;
2149 * Send RST_STREAM frame for stream.
2150 * @param h2_session: http2 session to submit frame to
2151 * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2152 * @return 0 on error, 1 otherwise
2154 static int http2_submit_rst_stream(struct http2_session* h2_session,
2155 struct http2_stream* h2_stream)
2157 int ret = nghttp2_submit_rst_stream(h2_session->session,
2158 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2159 NGHTTP2_INTERNAL_ERROR);
2161 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2162 "error: %s", nghttp2_strerror(ret));
2169 * DNS response ready to be submitted to nghttp2, to be prepared for sending
2170 * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2171 * might be used before this will be sent out.
2172 * @param h2_session: http2 session, containing c->buffer which contains answer
2173 * @return 0 on error, 1 otherwise
2175 int http2_submit_dns_response(struct http2_session* h2_session)
2178 nghttp2_data_provider data_prd;
2180 nghttp2_nv headers[2];
2181 struct http2_stream* h2_stream = h2_session->c->h2_stream;
2184 if(h2_stream->rbuffer) {
2185 log_err("http2 submit response error: rbuffer already "
2189 if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2190 log_err("http2 submit response error: c->buffer not complete");
2194 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2195 verbose(VERB_QUERY, "http2: submit response error: "
2200 rlen = sldns_buffer_remaining(h2_session->c->buffer);
2201 lock_basic_lock(&http2_response_buffer_count_lock);
2202 if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2203 lock_basic_unlock(&http2_response_buffer_count_lock);
2204 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2205 "in https-response-buffer-size");
2206 return http2_submit_rst_stream(h2_session, h2_stream);
2208 http2_response_buffer_count += rlen;
2209 lock_basic_unlock(&http2_response_buffer_count_lock);
2211 if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2212 lock_basic_lock(&http2_response_buffer_count_lock);
2213 http2_response_buffer_count -= rlen;
2214 lock_basic_unlock(&http2_response_buffer_count_lock);
2215 log_err("http2 submit response error: malloc failure");
2219 headers[0].name = (uint8_t*)":status";
2220 headers[0].namelen = 7;
2221 headers[0].value = (uint8_t*)status;
2222 headers[0].valuelen = 3;
2223 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2225 headers[1].name = (uint8_t*)"content-type";
2226 headers[1].namelen = 12;
2227 headers[1].value = (uint8_t*)"application/dns-message";
2228 headers[1].valuelen = 23;
2229 headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2231 /*TODO be nice and add the content-length header
2232 headers[2].name = (uint8_t*)"content-length";
2233 headers[2].namelen = 14;
2235 headers[2].valuelen =
2236 headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2239 sldns_buffer_write(h2_stream->rbuffer,
2240 sldns_buffer_current(h2_session->c->buffer),
2241 sldns_buffer_remaining(h2_session->c->buffer));
2242 sldns_buffer_flip(h2_stream->rbuffer);
2244 data_prd.source.ptr = h2_session;
2245 data_prd.read_callback = http2_submit_response_read_callback;
2246 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2247 headers, 2, &data_prd);
2249 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2250 "error: %s", nghttp2_strerror(ret));
2256 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2263 /** HTTP status to descriptive string */
2264 static char* http_status_to_str(enum http_status s)
2267 case HTTP_STATUS_OK:
2269 case HTTP_STATUS_BAD_REQUEST:
2270 return "Bad Request";
2271 case HTTP_STATUS_NOT_FOUND:
2273 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2274 return "Payload Too Large";
2275 case HTTP_STATUS_URI_TOO_LONG:
2276 return "URI Too Long";
2277 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2278 return "Unsupported Media Type";
2279 case HTTP_STATUS_NOT_IMPLEMENTED:
2280 return "Not Implemented";
2282 return "Status Unknown";
2285 /** nghttp2 callback. Used to copy error message to nghttp2 session */
2286 static ssize_t http2_submit_error_read_callback(
2287 nghttp2_session* ATTR_UNUSED(session),
2288 int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2289 nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2291 struct http2_stream* h2_stream;
2292 struct http2_session* h2_session = source->ptr;
2294 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2295 h2_session->session, stream_id))) {
2296 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2298 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2300 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2301 msg = http_status_to_str(h2_stream->status);
2302 if(length < strlen(msg))
2303 return 0; /* not worth trying over multiple frames */
2304 memcpy(buf, msg, strlen(msg));
2310 * HTTP error response ready to be submitted to nghttp2, to be prepared for
2311 * sending out. Message body will contain descriptive string for HTTP status.
2312 * @param h2_session: http2 session to submit to
2313 * @param h2_stream: http2 stream containing HTTP status to use for error
2314 * @return 0 on error, 1 otherwise
2316 static int http2_submit_error(struct http2_session* h2_session,
2317 struct http2_stream* h2_stream)
2321 nghttp2_data_provider data_prd;
2322 nghttp2_nv headers[1]; /* will be copied by nghttp */
2323 if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2324 verbose(VERB_QUERY, "http2: submit error failed, "
2328 headers[0].name = (uint8_t*)":status";
2329 headers[0].namelen = 7;
2330 headers[0].value = (uint8_t*)status;
2331 headers[0].valuelen = 3;
2332 headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2334 data_prd.source.ptr = h2_session;
2335 data_prd.read_callback = http2_submit_error_read_callback;
2337 ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2338 headers, 1, &data_prd);
2340 verbose(VERB_QUERY, "http2: submit error failed, "
2341 "error: %s", nghttp2_strerror(ret));
2348 * Start query handling. Query is stored in the stream, and will be free'd here.
2349 * @param h2_session: http2 session, containing comm point
2350 * @param h2_stream: stream containing buffered query
2351 * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2352 * reply available (yet).
2354 static int http2_query_read_done(struct http2_session* h2_session,
2355 struct http2_stream* h2_stream)
2357 log_assert(h2_stream->qbuffer);
2359 if(h2_session->c->h2_stream) {
2360 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2361 "buffer already assigned to stream");
2364 if(sldns_buffer_remaining(h2_session->c->buffer) <
2365 sldns_buffer_remaining(h2_stream->qbuffer)) {
2366 /* qbuffer will be free'd in frame close cb */
2367 sldns_buffer_clear(h2_session->c->buffer);
2368 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2369 "qbuffer in c->buffer");
2373 sldns_buffer_write(h2_session->c->buffer,
2374 sldns_buffer_current(h2_stream->qbuffer),
2375 sldns_buffer_remaining(h2_stream->qbuffer));
2377 lock_basic_lock(&http2_query_buffer_count_lock);
2378 http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2379 lock_basic_unlock(&http2_query_buffer_count_lock);
2380 sldns_buffer_free(h2_stream->qbuffer);
2381 h2_stream->qbuffer = NULL;
2383 sldns_buffer_flip(h2_session->c->buffer);
2384 h2_session->c->h2_stream = h2_stream;
2385 fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2386 if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2387 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2388 return 1; /* answer in c->buffer */
2390 sldns_buffer_clear(h2_session->c->buffer);
2391 h2_session->c->h2_stream = NULL;
2392 return 0; /* mesh state added, or dropped */
2395 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2396 * stream. Gather collected request data and start query handling. */
2397 static int http2_req_frame_recv_cb(nghttp2_session* session,
2398 const nghttp2_frame* frame, void* cb_arg)
2400 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2401 struct http2_stream* h2_stream;
2402 int query_read_done;
2404 if((frame->hd.type != NGHTTP2_DATA &&
2405 frame->hd.type != NGHTTP2_HEADERS) ||
2406 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2410 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2411 session, frame->hd.stream_id)))
2414 if(h2_stream->invalid_endpoint) {
2415 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2416 goto submit_http_error;
2419 if(h2_stream->invalid_content_type) {
2420 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2421 goto submit_http_error;
2424 if(h2_stream->http_method != HTTP_METHOD_GET &&
2425 h2_stream->http_method != HTTP_METHOD_POST) {
2426 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2427 goto submit_http_error;
2430 if(h2_stream->query_too_large) {
2431 if(h2_stream->http_method == HTTP_METHOD_POST)
2432 h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2434 h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2435 goto submit_http_error;
2438 if(!h2_stream->qbuffer) {
2439 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2440 goto submit_http_error;
2443 if(h2_stream->status) {
2445 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2446 "%d", h2_stream->status);
2447 if(!http2_submit_error(h2_session, h2_stream)) {
2448 return NGHTTP2_ERR_CALLBACK_FAILURE;
2452 h2_stream->status = HTTP_STATUS_OK;
2454 sldns_buffer_flip(h2_stream->qbuffer);
2455 h2_session->postpone_drop = 1;
2456 query_read_done = http2_query_read_done(h2_session, h2_stream);
2457 if(query_read_done < 0)
2458 return NGHTTP2_ERR_CALLBACK_FAILURE;
2459 else if(!query_read_done) {
2460 if(h2_session->is_drop) {
2461 /* connection needs to be closed. Return failure to make
2462 * sure no other action are taken anymore on comm point.
2463 * failure will result in reclaiming (and closing)
2465 verbose(VERB_QUERY, "http2 query dropped in worker cb");
2466 h2_session->postpone_drop = 0;
2467 return NGHTTP2_ERR_CALLBACK_FAILURE;
2469 /* nothing to submit right now, query added to mesh. */
2470 h2_session->postpone_drop = 0;
2473 if(!http2_submit_dns_response(h2_session)) {
2474 sldns_buffer_clear(h2_session->c->buffer);
2475 h2_session->c->h2_stream = NULL;
2476 return NGHTTP2_ERR_CALLBACK_FAILURE;
2478 verbose(VERB_QUERY, "http2 query submitted to session");
2479 sldns_buffer_clear(h2_session->c->buffer);
2480 h2_session->c->h2_stream = NULL;
2484 /** nghttp2 callback. Used to detect start of new streams. */
2485 static int http2_req_begin_headers_cb(nghttp2_session* session,
2486 const nghttp2_frame* frame, void* cb_arg)
2488 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2489 struct http2_stream* h2_stream;
2491 if(frame->hd.type != NGHTTP2_HEADERS ||
2492 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2493 /* only interrested in request headers */
2496 if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2497 log_err("malloc failure while creating http2 stream");
2498 return NGHTTP2_ERR_CALLBACK_FAILURE;
2500 http2_session_add_stream(h2_session, h2_stream);
2501 ret = nghttp2_session_set_stream_user_data(session,
2502 frame->hd.stream_id, h2_stream);
2504 /* stream does not exist */
2505 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2506 "error: %s", nghttp2_strerror(ret));
2507 return NGHTTP2_ERR_CALLBACK_FAILURE;
2514 * base64url decode, store in qbuffer
2515 * @param h2_session: http2 session
2516 * @param h2_stream: http2 stream
2517 * @param start: start of the base64 string
2518 * @param length: length of the base64 string
2519 * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2520 * buffer will be NULL is unparseble.
2522 static int http2_buffer_uri_query(struct http2_session* h2_session,
2523 struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2525 size_t expectb64len;
2527 if(h2_stream->http_method == HTTP_METHOD_POST)
2531 if(h2_stream->qbuffer) {
2532 verbose(VERB_ALGO, "http2_req_header fail, "
2533 "qbuffer already set");
2537 /* calculate size, might be a bit bigger than the real
2538 * decoded buffer size */
2539 expectb64len = sldns_b64_pton_calculate_size(length);
2540 log_assert(expectb64len > 0);
2542 h2_session->c->http2_stream_max_qbuffer_size) {
2543 h2_stream->query_too_large = 1;
2547 lock_basic_lock(&http2_query_buffer_count_lock);
2548 if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2549 lock_basic_unlock(&http2_query_buffer_count_lock);
2550 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2551 "in http2-query-buffer-size");
2552 return http2_submit_rst_stream(h2_session, h2_stream);
2554 http2_query_buffer_count += expectb64len;
2555 lock_basic_unlock(&http2_query_buffer_count_lock);
2556 if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2557 lock_basic_lock(&http2_query_buffer_count_lock);
2558 http2_query_buffer_count -= expectb64len;
2559 lock_basic_unlock(&http2_query_buffer_count_lock);
2560 log_err("http2_req_header fail, qbuffer "
2565 if(!(b64len = sldns_b64url_pton(
2566 (char const *)start, length,
2567 sldns_buffer_current(h2_stream->qbuffer),
2568 expectb64len)) || b64len < 0) {
2569 lock_basic_lock(&http2_query_buffer_count_lock);
2570 http2_query_buffer_count -= expectb64len;
2571 lock_basic_unlock(&http2_query_buffer_count_lock);
2572 sldns_buffer_free(h2_stream->qbuffer);
2573 h2_stream->qbuffer = NULL;
2574 /* return without error, method can be an
2578 sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2582 /** nghttp2 callback. Used to parse headers from HEADER frames. */
2583 static int http2_req_header_cb(nghttp2_session* session,
2584 const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2585 const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2588 struct http2_stream* h2_stream = NULL;
2589 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2590 /* nghttp2 deals with CONTINUATION frames and provides them as part of
2592 if(frame->hd.type != NGHTTP2_HEADERS ||
2593 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2594 /* only interrested in request headers */
2597 if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2598 frame->hd.stream_id)))
2601 /* earlier checks already indicate we can stop handling this query */
2602 if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2603 h2_stream->invalid_content_type ||
2604 h2_stream->invalid_endpoint)
2608 /* nghttp2 performs some sanity checks in the headers, including:
2609 * name and value are guaranteed to be null terminated
2610 * name is guaranteed to be lowercase
2611 * content-length value is guaranteed to contain digits
2614 if(!h2_stream->http_method && namelen == 7 &&
2615 memcmp(":method", name, namelen) == 0) {
2616 /* Case insensitive check on :method value to be on the safe
2617 * side. I failed to find text about case sensitivity in specs.
2619 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2620 h2_stream->http_method = HTTP_METHOD_GET;
2621 else if(valuelen == 4 &&
2622 strcasecmp("POST", (const char*)value) == 0) {
2623 h2_stream->http_method = HTTP_METHOD_POST;
2624 if(h2_stream->qbuffer) {
2625 /* POST method uses query from DATA frames */
2626 lock_basic_lock(&http2_query_buffer_count_lock);
2627 http2_query_buffer_count -=
2628 sldns_buffer_capacity(h2_stream->qbuffer);
2629 lock_basic_unlock(&http2_query_buffer_count_lock);
2630 sldns_buffer_free(h2_stream->qbuffer);
2631 h2_stream->qbuffer = NULL;
2634 h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2637 if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2638 /* :path may contain DNS query, depending on method. Method might
2639 * not be known yet here, so check after finishing receiving
2641 #define HTTP_QUERY_PARAM "?dns="
2642 size_t el = strlen(h2_session->c->http_endpoint);
2643 size_t qpl = strlen(HTTP_QUERY_PARAM);
2645 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2647 h2_stream->invalid_endpoint = 1;
2650 /* larger than endpoint only allowed if it is for the query
2652 if(valuelen <= el+qpl ||
2653 memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2655 h2_stream->invalid_endpoint = 1;
2659 if(!http2_buffer_uri_query(h2_session, h2_stream,
2660 value+(el+qpl), valuelen-(el+qpl))) {
2661 return NGHTTP2_ERR_CALLBACK_FAILURE;
2665 /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2666 * and not needed when using GET. Don't enfore.
2667 * If set only allow lowercase "application/dns-message".
2669 * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2670 * be able to handle "application/dns-message". Since that is the only
2671 * content-type supported we can ignore the accept header.
2673 if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2674 if(valuelen != 23 || memcmp("application/dns-message", value,
2676 h2_stream->invalid_content_type = 1;
2680 /* Only interested in content-lentg for POST (on not yet known) method.
2682 if((!h2_stream->http_method ||
2683 h2_stream->http_method == HTTP_METHOD_POST) &&
2684 !h2_stream->content_length && namelen == 14 &&
2685 memcmp("content-length", name, namelen) == 0) {
2687 h2_stream->query_too_large = 1;
2690 /* guaranteed to only contian digits and be null terminated */
2691 h2_stream->content_length = atoi((const char*)value);
2692 if(h2_stream->content_length >
2693 h2_session->c->http2_stream_max_qbuffer_size) {
2694 h2_stream->query_too_large = 1;
2701 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2702 * queries in POST requests. */
2703 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2704 uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2705 size_t len, void* cb_arg)
2707 struct http2_session* h2_session = (struct http2_session*)cb_arg;
2708 struct http2_stream* h2_stream;
2711 if(!(h2_stream = nghttp2_session_get_stream_user_data(
2712 h2_session->session, stream_id))) {
2716 if(h2_stream->query_too_large)
2719 if(!h2_stream->qbuffer) {
2720 if(h2_stream->content_length) {
2721 if(h2_stream->content_length < len)
2722 /* getting more data in DATA frame than
2723 * advertised in content-length header. */
2724 return NGHTTP2_ERR_CALLBACK_FAILURE;
2725 qlen = h2_stream->content_length;
2726 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2727 /* setting this to msg-buffer-size can result in a lot
2728 * of memory consuption. Most queries should fit in a
2729 * single DATA frame, and most POST queries will
2730 * containt content-length which does not impose this
2735 if(!h2_stream->qbuffer && qlen) {
2736 lock_basic_lock(&http2_query_buffer_count_lock);
2737 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2738 lock_basic_unlock(&http2_query_buffer_count_lock);
2739 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2740 "in http2-query-buffer-size");
2741 return http2_submit_rst_stream(h2_session, h2_stream);
2743 http2_query_buffer_count += qlen;
2744 lock_basic_unlock(&http2_query_buffer_count_lock);
2745 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2746 lock_basic_lock(&http2_query_buffer_count_lock);
2747 http2_query_buffer_count -= qlen;
2748 lock_basic_unlock(&http2_query_buffer_count_lock);
2752 if(!h2_stream->qbuffer ||
2753 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2754 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2755 "buffer space for POST query. Can happen on multi "
2756 "frame requests without content-length header");
2757 h2_stream->query_too_large = 1;
2761 sldns_buffer_write(h2_stream->qbuffer, data, len);
2766 void http2_req_stream_clear(struct http2_stream* h2_stream)
2768 if(h2_stream->qbuffer) {
2769 lock_basic_lock(&http2_query_buffer_count_lock);
2770 http2_query_buffer_count -=
2771 sldns_buffer_capacity(h2_stream->qbuffer);
2772 lock_basic_unlock(&http2_query_buffer_count_lock);
2773 sldns_buffer_free(h2_stream->qbuffer);
2774 h2_stream->qbuffer = NULL;
2776 if(h2_stream->rbuffer) {
2777 lock_basic_lock(&http2_response_buffer_count_lock);
2778 http2_response_buffer_count -=
2779 sldns_buffer_capacity(h2_stream->rbuffer);
2780 lock_basic_unlock(&http2_response_buffer_count_lock);
2781 sldns_buffer_free(h2_stream->rbuffer);
2782 h2_stream->rbuffer = NULL;
2786 nghttp2_session_callbacks* http2_req_callbacks_create()
2788 nghttp2_session_callbacks *callbacks;
2789 if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
2790 log_err("failed to initialize nghttp2 callback");
2793 /* reception of header block started, used to create h2_stream */
2794 nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
2795 http2_req_begin_headers_cb);
2796 /* complete frame received, used to get data from stream if frame
2797 * has end stream flag, and start processing query */
2798 nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
2799 http2_req_frame_recv_cb);
2800 /* get request info from headers */
2801 nghttp2_session_callbacks_set_on_header_callback(callbacks,
2802 http2_req_header_cb);
2803 /* get data from DATA frames, containing POST query */
2804 nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
2805 http2_req_data_chunk_recv_cb);
2807 /* generic HTTP2 callbacks */
2808 nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
2809 nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
2810 nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
2811 http2_stream_close_cb);
2815 #endif /* HAVE_NGHTTP2 */