]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/services/listen_dnsport.c
dma: import snapshot 2021-07-10
[FreeBSD/FreeBSD.git] / contrib / unbound / services / listen_dnsport.c
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include <limits.h>
47 #ifdef USE_TCP_FASTOPEN
48 #include <netinet/tcp.h>
49 #endif
50 #include "services/listen_dnsport.h"
51 #include "services/outside_network.h"
52 #include "util/netevent.h"
53 #include "util/log.h"
54 #include "util/config_file.h"
55 #include "util/net_help.h"
56 #include "sldns/sbuffer.h"
57 #include "sldns/parseutil.h"
58 #include "services/mesh.h"
59 #include "util/fptr_wlist.h"
60 #include "util/locks.h"
61
62 #ifdef HAVE_NETDB_H
63 #include <netdb.h>
64 #endif
65 #include <fcntl.h>
66
67 #ifdef HAVE_SYS_UN_H
68 #include <sys/un.h>
69 #endif
70
71 #ifdef HAVE_SYSTEMD
72 #include <systemd/sd-daemon.h>
73 #endif
74
75 #ifdef HAVE_IFADDRS_H
76 #include <ifaddrs.h>
77 #endif
78 #ifdef HAVE_NET_IF_H
79 #include <net/if.h>
80 #endif
81
82 /** number of queued TCP connections for listen() */
83 #define TCP_BACKLOG 256 
84
85 #ifndef THREADS_DISABLED
86 /** lock on the counter of stream buffer memory */
87 static lock_basic_type stream_wait_count_lock;
88 /** lock on the counter of HTTP2 query buffer memory */
89 static lock_basic_type http2_query_buffer_count_lock;
90 /** lock on the counter of HTTP2 response buffer memory */
91 static lock_basic_type http2_response_buffer_count_lock;
92 #endif
93 /** size (in bytes) of stream wait buffers */
94 static size_t stream_wait_count = 0;
95 /** is the lock initialised for stream wait buffers */
96 static int stream_wait_lock_inited = 0;
97 /** size (in bytes) of HTTP2 query buffers */
98 static size_t http2_query_buffer_count = 0;
99 /** is the lock initialised for HTTP2 query buffers */
100 static int http2_query_buffer_lock_inited = 0;
101 /** size (in bytes) of HTTP2 response buffers */
102 static size_t http2_response_buffer_count = 0;
103 /** is the lock initialised for HTTP2 response buffers */
104 static int http2_response_buffer_lock_inited = 0;
105
106 /**
107  * Debug print of the getaddrinfo returned address.
108  * @param addr: the address returned.
109  */
110 static void
111 verbose_print_addr(struct addrinfo *addr)
112 {
113         if(verbosity >= VERB_ALGO) {
114                 char buf[100];
115                 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
116 #ifdef INET6
117                 if(addr->ai_family == AF_INET6)
118                         sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
119                                 sin6_addr;
120 #endif /* INET6 */
121                 if(inet_ntop(addr->ai_family, sinaddr, buf,
122                         (socklen_t)sizeof(buf)) == 0) {
123                         (void)strlcpy(buf, "(null)", sizeof(buf));
124                 }
125                 buf[sizeof(buf)-1] = 0;
126                 verbose(VERB_ALGO, "creating %s%s socket %s %d", 
127                         addr->ai_socktype==SOCK_DGRAM?"udp":
128                         addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
129                         addr->ai_family==AF_INET?"4":
130                         addr->ai_family==AF_INET6?"6":
131                         "_otherfam", buf, 
132                         ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
133         }
134 }
135
136 void
137 verbose_print_unbound_socket(struct unbound_socket* ub_sock)
138 {
139         if(verbosity >= VERB_ALGO) {
140                 log_info("listing of unbound_socket structure:");
141                 verbose_print_addr(ub_sock->addr);
142                 log_info("s is: %d, fam is: %s", ub_sock->s, ub_sock->fam == AF_INET?"AF_INET":"AF_INET6");
143         }
144 }
145
146 #ifdef HAVE_SYSTEMD
147 static int
148 systemd_get_activated(int family, int socktype, int listen,
149                       struct sockaddr *addr, socklen_t addrlen,
150                       const char *path)
151 {
152         int i = 0;
153         int r = 0;
154         int s = -1;
155         const char* listen_pid, *listen_fds;
156
157         /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
158
159         if((r = sd_booted()) < 1) {
160                 if(r == 0)
161                         log_warn("systemd is not running");
162                 else
163                         log_err("systemd sd_booted(): %s", strerror(-r));
164                 return -1;
165         }
166
167         listen_pid = getenv("LISTEN_PID");
168         listen_fds = getenv("LISTEN_FDS");
169
170         if (!listen_pid) {
171                 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
172                 return -1;
173         }
174
175         if (!listen_fds) {
176                 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
177                 return -1;
178         }
179
180         if((r = sd_listen_fds(0)) < 1) {
181                 if(r == 0)
182                         log_warn("systemd: did not return socket, check unit configuration");
183                 else
184                         log_err("systemd sd_listen_fds(): %s", strerror(-r));
185                 return -1;
186         }
187         
188         for(i = 0; i < r; i++) {
189                 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
190                         s = SD_LISTEN_FDS_START + i;
191                         break;
192                 }
193         }
194         if (s == -1) {
195                 if (addr)
196                         log_err_addr("systemd sd_listen_fds()",
197                                      "no such socket",
198                                      (struct sockaddr_storage *)addr, addrlen);
199                 else
200                         log_err("systemd sd_listen_fds(): %s", path);
201         }
202         return s;
203 }
204 #endif
205
206 int
207 create_udp_sock(int family, int socktype, struct sockaddr* addr,
208         socklen_t addrlen, int v6only, int* inuse, int* noproto,
209         int rcv, int snd, int listen, int* reuseport, int transparent,
210         int freebind, int use_systemd, int dscp)
211 {
212         int s;
213         char* err;
214 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
215         int on=1;
216 #endif
217 #ifdef IPV6_MTU
218         int mtu = IPV6_MIN_MTU;
219 #endif
220 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
221         (void)rcv;
222 #endif
223 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
224         (void)snd;
225 #endif
226 #ifndef IPV6_V6ONLY
227         (void)v6only;
228 #endif
229 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
230         (void)transparent;
231 #endif
232 #if !defined(IP_FREEBIND)
233         (void)freebind;
234 #endif
235 #ifdef HAVE_SYSTEMD
236         int got_fd_from_systemd = 0;
237
238         if (!use_systemd
239             || (use_systemd
240                 && (s = systemd_get_activated(family, socktype, -1, addr,
241                                               addrlen, NULL)) == -1)) {
242 #else
243         (void)use_systemd;
244 #endif
245         if((s = socket(family, socktype, 0)) == -1) {
246                 *inuse = 0;
247 #ifndef USE_WINSOCK
248                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
249                         *noproto = 1;
250                         return -1;
251                 }
252 #else
253                 if(WSAGetLastError() == WSAEAFNOSUPPORT || 
254                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
255                         *noproto = 1;
256                         return -1;
257                 }
258 #endif
259                 log_err("can't create socket: %s", sock_strerror(errno));
260                 *noproto = 0;
261                 return -1;
262         }
263 #ifdef HAVE_SYSTEMD
264         } else {
265                 got_fd_from_systemd = 1;
266         }
267 #endif
268         if(listen) {
269 #ifdef SO_REUSEADDR
270                 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
271                         (socklen_t)sizeof(on)) < 0) {
272                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
273                                 sock_strerror(errno));
274 #ifndef USE_WINSOCK
275                         if(errno != ENOSYS) {
276                                 close(s);
277                                 *noproto = 0;
278                                 *inuse = 0;
279                                 return -1;
280                         }
281 #else
282                         closesocket(s);
283                         *noproto = 0;
284                         *inuse = 0;
285                         return -1;
286 #endif
287                 }
288 #endif /* SO_REUSEADDR */
289 #ifdef SO_REUSEPORT
290 #  ifdef SO_REUSEPORT_LB
291                 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
292                  * like SO_REUSEPORT on Linux.  This is what the users want
293                  * with the config option in unbound.conf; if we actually
294                  * need local address and port reuse they'll also need to
295                  * have SO_REUSEPORT set for them, assume it was _LB they want.
296                  */
297                 if (reuseport && *reuseport &&
298                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
299                         (socklen_t)sizeof(on)) < 0) {
300 #ifdef ENOPROTOOPT
301                         if(errno != ENOPROTOOPT || verbosity >= 3)
302                                 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
303                                         strerror(errno));
304 #endif
305                         /* this option is not essential, we can continue */
306                         *reuseport = 0;
307                 }
308 #  else /* no SO_REUSEPORT_LB */
309
310                 /* try to set SO_REUSEPORT so that incoming
311                  * queries are distributed evenly among the receiving threads.
312                  * Each thread must have its own socket bound to the same port,
313                  * with SO_REUSEPORT set on each socket.
314                  */
315                 if (reuseport && *reuseport &&
316                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
317                         (socklen_t)sizeof(on)) < 0) {
318 #ifdef ENOPROTOOPT
319                         if(errno != ENOPROTOOPT || verbosity >= 3)
320                                 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
321                                         strerror(errno));
322 #endif
323                         /* this option is not essential, we can continue */
324                         *reuseport = 0;
325                 }
326 #  endif /* SO_REUSEPORT_LB */
327 #else
328                 (void)reuseport;
329 #endif /* defined(SO_REUSEPORT) */
330 #ifdef IP_TRANSPARENT
331                 if (transparent &&
332                     setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
333                     (socklen_t)sizeof(on)) < 0) {
334                         log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
335                         strerror(errno));
336                 }
337 #elif defined(IP_BINDANY)
338                 if (transparent &&
339                     setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
340                     (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
341                     (void*)&on, (socklen_t)sizeof(on)) < 0) {
342                         log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
343                         (family==AF_INET6?"V6":""), strerror(errno));
344                 }
345 #elif defined(SO_BINDANY)
346                 if (transparent &&
347                     setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
348                     (socklen_t)sizeof(on)) < 0) {
349                         log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
350                         strerror(errno));
351                 }
352 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
353         }
354 #ifdef IP_FREEBIND
355         if(freebind &&
356             setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
357             (socklen_t)sizeof(on)) < 0) {
358                 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
359                 strerror(errno));
360         }
361 #endif /* IP_FREEBIND */
362         if(rcv) {
363 #ifdef SO_RCVBUF
364                 int got;
365                 socklen_t slen = (socklen_t)sizeof(got);
366 #  ifdef SO_RCVBUFFORCE
367                 /* Linux specific: try to use root permission to override
368                  * system limits on rcvbuf. The limit is stored in 
369                  * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
370                 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 
371                         (socklen_t)sizeof(rcv)) < 0) {
372                         if(errno != EPERM) {
373                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
374                                         "...) failed: %s", sock_strerror(errno));
375                                 sock_close(s);
376                                 *noproto = 0;
377                                 *inuse = 0;
378                                 return -1;
379                         }
380 #  endif /* SO_RCVBUFFORCE */
381                         if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 
382                                 (socklen_t)sizeof(rcv)) < 0) {
383                                 log_err("setsockopt(..., SO_RCVBUF, "
384                                         "...) failed: %s", sock_strerror(errno));
385                                 sock_close(s);
386                                 *noproto = 0;
387                                 *inuse = 0;
388                                 return -1;
389                         }
390                         /* check if we got the right thing or if system
391                          * reduced to some system max.  Warn if so */
392                         if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 
393                                 &slen) >= 0 && got < rcv/2) {
394                                 log_warn("so-rcvbuf %u was not granted. "
395                                         "Got %u. To fix: start with "
396                                         "root permissions(linux) or sysctl "
397                                         "bigger net.core.rmem_max(linux) or "
398                                         "kern.ipc.maxsockbuf(bsd) values.",
399                                         (unsigned)rcv, (unsigned)got);
400                         }
401 #  ifdef SO_RCVBUFFORCE
402                 }
403 #  endif
404 #endif /* SO_RCVBUF */
405         }
406         /* first do RCVBUF as the receive buffer is more important */
407         if(snd) {
408 #ifdef SO_SNDBUF
409                 int got;
410                 socklen_t slen = (socklen_t)sizeof(got);
411 #  ifdef SO_SNDBUFFORCE
412                 /* Linux specific: try to use root permission to override
413                  * system limits on sndbuf. The limit is stored in 
414                  * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
415                 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 
416                         (socklen_t)sizeof(snd)) < 0) {
417                         if(errno != EPERM) {
418                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
419                                         "...) failed: %s", sock_strerror(errno));
420                                 sock_close(s);
421                                 *noproto = 0;
422                                 *inuse = 0;
423                                 return -1;
424                         }
425 #  endif /* SO_SNDBUFFORCE */
426                         if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 
427                                 (socklen_t)sizeof(snd)) < 0) {
428                                 log_err("setsockopt(..., SO_SNDBUF, "
429                                         "...) failed: %s", sock_strerror(errno));
430                                 sock_close(s);
431                                 *noproto = 0;
432                                 *inuse = 0;
433                                 return -1;
434                         }
435                         /* check if we got the right thing or if system
436                          * reduced to some system max.  Warn if so */
437                         if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 
438                                 &slen) >= 0 && got < snd/2) {
439                                 log_warn("so-sndbuf %u was not granted. "
440                                         "Got %u. To fix: start with "
441                                         "root permissions(linux) or sysctl "
442                                         "bigger net.core.wmem_max(linux) or "
443                                         "kern.ipc.maxsockbuf(bsd) values.",
444                                         (unsigned)snd, (unsigned)got);
445                         }
446 #  ifdef SO_SNDBUFFORCE
447                 }
448 #  endif
449 #endif /* SO_SNDBUF */
450         }
451         err = set_ip_dscp(s, family, dscp);
452         if(err != NULL)
453                 log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err);
454         if(family == AF_INET6) {
455 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
456                 int omit6_set = 0;
457                 int action;
458 # endif
459 # if defined(IPV6_V6ONLY)
460                 if(v6only) {
461                         int val=(v6only==2)?0:1;
462                         if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
463                                 (void*)&val, (socklen_t)sizeof(val)) < 0) {
464                                 log_err("setsockopt(..., IPV6_V6ONLY"
465                                         ", ...) failed: %s", sock_strerror(errno));
466                                 sock_close(s);
467                                 *noproto = 0;
468                                 *inuse = 0;
469                                 return -1;
470                         }
471                 }
472 # endif
473 # if defined(IPV6_USE_MIN_MTU)
474                 /*
475                  * There is no fragmentation of IPv6 datagrams
476                  * during forwarding in the network. Therefore
477                  * we do not send UDP datagrams larger than
478                  * the minimum IPv6 MTU of 1280 octets. The
479                  * EDNS0 message length can be larger if the
480                  * network stack supports IPV6_USE_MIN_MTU.
481                  */
482                 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
483                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
484                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
485                                 "...) failed: %s", sock_strerror(errno));
486                         sock_close(s);
487                         *noproto = 0;
488                         *inuse = 0;
489                         return -1;
490                 }
491 # elif defined(IPV6_MTU)
492                 /*
493                  * On Linux, to send no larger than 1280, the PMTUD is
494                  * disabled by default for datagrams anyway, so we set
495                  * the MTU to use.
496                  */
497                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
498                         (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
499                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
500                                 sock_strerror(errno));
501                         sock_close(s);
502                         *noproto = 0;
503                         *inuse = 0;
504                         return -1;
505                 }
506 # endif /* IPv6 MTU */
507 # if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
508 #  if defined(IP_PMTUDISC_OMIT)
509                 action = IP_PMTUDISC_OMIT;
510                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
511                         &action, (socklen_t)sizeof(action)) < 0) {
512
513                         if (errno != EINVAL) {
514                                 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
515                                         strerror(errno));
516                                 sock_close(s);
517                                 *noproto = 0;
518                                 *inuse = 0;
519                                 return -1;
520                         }
521                 }
522                 else
523                 {
524                     omit6_set = 1;
525                 }
526 #  endif
527                 if (omit6_set == 0) {
528                         action = IP_PMTUDISC_DONT;
529                         if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
530                                 &action, (socklen_t)sizeof(action)) < 0) {
531                                 log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
532                                         strerror(errno));
533                                 sock_close(s);
534                                 *noproto = 0;
535                                 *inuse = 0;
536                                 return -1;
537                         }
538                 }
539 # endif /* IPV6_MTU_DISCOVER */
540         } else if(family == AF_INET) {
541 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
542 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
543  * PMTU information is not accepted, but fragmentation is allowed
544  * if and only if the packet size exceeds the outgoing interface MTU
545  * (and also uses the interface mtu to determine the size of the packets).
546  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
547  * FreeBSD already has same semantics without setting the option. */
548                 int omit_set = 0;
549                 int action;
550 #   if defined(IP_PMTUDISC_OMIT)
551                 action = IP_PMTUDISC_OMIT;
552                 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 
553                         &action, (socklen_t)sizeof(action)) < 0) {
554
555                         if (errno != EINVAL) {
556                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
557                                         strerror(errno));
558                                 sock_close(s);
559                                 *noproto = 0;
560                                 *inuse = 0;
561                                 return -1;
562                         }
563                 }
564                 else
565                 {
566                     omit_set = 1;
567                 }
568 #   endif
569                 if (omit_set == 0) {
570                         action = IP_PMTUDISC_DONT;
571                         if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
572                                 &action, (socklen_t)sizeof(action)) < 0) {
573                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
574                                         strerror(errno));
575                                 sock_close(s);
576                                 *noproto = 0;
577                                 *inuse = 0;
578                                 return -1;
579                         }
580                 }
581 #  elif defined(IP_DONTFRAG) && !defined(__APPLE__)
582                 /* the IP_DONTFRAG option if defined in the 11.0 OSX headers,
583                  * but does not work on that version, so we exclude it */
584                 int off = 0;
585                 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 
586                         &off, (socklen_t)sizeof(off)) < 0) {
587                         log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
588                                 strerror(errno));
589                         sock_close(s);
590                         *noproto = 0;
591                         *inuse = 0;
592                         return -1;
593                 }
594 #  endif /* IPv4 MTU */
595         }
596         if(
597 #ifdef HAVE_SYSTEMD
598                 !got_fd_from_systemd &&
599 #endif
600                 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
601                 *noproto = 0;
602                 *inuse = 0;
603 #ifndef USE_WINSOCK
604 #ifdef EADDRINUSE
605                 *inuse = (errno == EADDRINUSE);
606                 /* detect freebsd jail with no ipv6 permission */
607                 if(family==AF_INET6 && errno==EINVAL)
608                         *noproto = 1;
609                 else if(errno != EADDRINUSE &&
610                         !(errno == EACCES && verbosity < 4 && !listen)
611 #ifdef EADDRNOTAVAIL
612                         && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
613 #endif
614                         ) {
615                         log_err_addr("can't bind socket", strerror(errno),
616                                 (struct sockaddr_storage*)addr, addrlen);
617                 }
618 #endif /* EADDRINUSE */
619 #else /* USE_WINSOCK */
620                 if(WSAGetLastError() != WSAEADDRINUSE &&
621                         WSAGetLastError() != WSAEADDRNOTAVAIL &&
622                         !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
623                         log_err_addr("can't bind socket", 
624                                 wsa_strerror(WSAGetLastError()),
625                                 (struct sockaddr_storage*)addr, addrlen);
626                 }
627 #endif /* USE_WINSOCK */
628                 sock_close(s);
629                 return -1;
630         }
631         if(!fd_set_nonblock(s)) {
632                 *noproto = 0;
633                 *inuse = 0;
634                 sock_close(s);
635                 return -1;
636         }
637         return s;
638 }
639
640 int
641 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
642         int* reuseport, int transparent, int mss, int nodelay, int freebind,
643         int use_systemd, int dscp)
644 {
645         int s;
646         char* err;
647 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
648         int on = 1;
649 #endif
650 #ifdef HAVE_SYSTEMD
651         int got_fd_from_systemd = 0;
652 #endif
653 #ifdef USE_TCP_FASTOPEN
654         int qlen;
655 #endif
656 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
657         (void)transparent;
658 #endif
659 #if !defined(IP_FREEBIND)
660         (void)freebind;
661 #endif
662         verbose_print_addr(addr);
663         *noproto = 0;
664 #ifdef HAVE_SYSTEMD
665         if (!use_systemd ||
666             (use_systemd
667              && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
668                                            addr->ai_addr, addr->ai_addrlen,
669                                            NULL)) == -1)) {
670 #else
671         (void)use_systemd;
672 #endif
673         if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
674 #ifndef USE_WINSOCK
675                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
676                         *noproto = 1;
677                         return -1;
678                 }
679 #else
680                 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
681                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
682                         *noproto = 1;
683                         return -1;
684                 }
685 #endif
686                 log_err("can't create socket: %s", sock_strerror(errno));
687                 return -1;
688         }
689         if(nodelay) {
690 #if defined(IPPROTO_TCP) && defined(TCP_NODELAY)
691                 if(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void*)&on,
692                         (socklen_t)sizeof(on)) < 0) {
693                         #ifndef USE_WINSOCK
694                         log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
695                                 strerror(errno));
696                         #else
697                         log_err(" setsockopt(.. TCP_NODELAY ..) failed: %s",
698                                 wsa_strerror(WSAGetLastError()));
699                         #endif
700                 }
701 #else
702                 log_warn(" setsockopt(TCP_NODELAY) unsupported");
703 #endif /* defined(IPPROTO_TCP) && defined(TCP_NODELAY) */
704         }
705         if (mss > 0) {
706 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
707                 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
708                         (socklen_t)sizeof(mss)) < 0) {
709                         log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
710                                 sock_strerror(errno));
711                 } else {
712                         verbose(VERB_ALGO,
713                                 " tcp socket mss set to %d", mss);
714                 }
715 #else
716                 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
717 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
718         }
719 #ifdef HAVE_SYSTEMD
720         } else {
721                 got_fd_from_systemd = 1;
722     }
723 #endif
724 #ifdef SO_REUSEADDR
725         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
726                 (socklen_t)sizeof(on)) < 0) {
727                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
728                         sock_strerror(errno));
729                 sock_close(s);
730                 return -1;
731         }
732 #endif /* SO_REUSEADDR */
733 #ifdef IP_FREEBIND
734         if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
735             (socklen_t)sizeof(on)) < 0) {
736                 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
737                 strerror(errno));
738         }
739 #endif /* IP_FREEBIND */
740 #ifdef SO_REUSEPORT
741         /* try to set SO_REUSEPORT so that incoming
742          * connections are distributed evenly among the receiving threads.
743          * Each thread must have its own socket bound to the same port,
744          * with SO_REUSEPORT set on each socket.
745          */
746         if (reuseport && *reuseport &&
747                 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
748                 (socklen_t)sizeof(on)) < 0) {
749 #ifdef ENOPROTOOPT
750                 if(errno != ENOPROTOOPT || verbosity >= 3)
751                         log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
752                                 strerror(errno));
753 #endif
754                 /* this option is not essential, we can continue */
755                 *reuseport = 0;
756         }
757 #else
758         (void)reuseport;
759 #endif /* defined(SO_REUSEPORT) */
760 #if defined(IPV6_V6ONLY)
761         if(addr->ai_family == AF_INET6 && v6only) {
762                 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
763                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
764                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
765                                 sock_strerror(errno));
766                         sock_close(s);
767                         return -1;
768                 }
769         }
770 #else
771         (void)v6only;
772 #endif /* IPV6_V6ONLY */
773 #ifdef IP_TRANSPARENT
774         if (transparent &&
775             setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
776             (socklen_t)sizeof(on)) < 0) {
777                 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
778                         strerror(errno));
779         }
780 #elif defined(IP_BINDANY)
781         if (transparent &&
782             setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
783             (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
784             (void*)&on, (socklen_t)sizeof(on)) < 0) {
785                 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
786                 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
787         }
788 #elif defined(SO_BINDANY)
789         if (transparent &&
790             setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
791             sizeof(on)) < 0) {
792                 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
793                 strerror(errno));
794         }
795 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
796         err = set_ip_dscp(s, addr->ai_family, dscp);
797         if(err != NULL)
798                 log_warn("error setting IP DiffServ codepoint %d on TCP socket: %s", dscp, err);
799         if(
800 #ifdef HAVE_SYSTEMD
801                 !got_fd_from_systemd &&
802 #endif
803         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
804 #ifndef USE_WINSOCK
805                 /* detect freebsd jail with no ipv6 permission */
806                 if(addr->ai_family==AF_INET6 && errno==EINVAL)
807                         *noproto = 1;
808                 else {
809                         log_err_addr("can't bind socket", strerror(errno),
810                                 (struct sockaddr_storage*)addr->ai_addr,
811                                 addr->ai_addrlen);
812                 }
813 #else
814                 log_err_addr("can't bind socket", 
815                         wsa_strerror(WSAGetLastError()),
816                         (struct sockaddr_storage*)addr->ai_addr,
817                         addr->ai_addrlen);
818 #endif
819                 sock_close(s);
820                 return -1;
821         }
822         if(!fd_set_nonblock(s)) {
823                 sock_close(s);
824                 return -1;
825         }
826         if(listen(s, TCP_BACKLOG) == -1) {
827                 log_err("can't listen: %s", sock_strerror(errno));
828                 sock_close(s);
829                 return -1;
830         }
831 #ifdef USE_TCP_FASTOPEN
832         /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
833            against IP spoofing attacks as suggested in RFC7413 */
834 #ifdef __APPLE__
835         /* OS X implementation only supports qlen of 1 via this call. Actual
836            value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
837         qlen = 1;
838 #else
839         /* 5 is recommended on linux */
840         qlen = 5;
841 #endif
842         if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 
843                   sizeof(qlen))) == -1 ) {
844 #ifdef ENOPROTOOPT
845                 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
846                    disabled, except when verbosity enabled for debugging */
847                 if(errno != ENOPROTOOPT || verbosity >= 3) {
848 #endif
849                   if(errno == EPERM) {
850                         log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
851                   } else {
852                         log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
853                   }
854 #ifdef ENOPROTOOPT
855                 }
856 #endif
857         }
858 #endif
859         return s;
860 }
861
862 char*
863 set_ip_dscp(int socket, int addrfamily, int dscp)
864 {
865         int ds;
866
867         if(dscp == 0)
868                 return NULL;
869         ds = dscp << 2;
870         switch(addrfamily) {
871         case AF_INET6:
872                 if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0)
873                         return sock_strerror(errno);
874                 break;
875         default:
876                 if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0)
877                         return sock_strerror(errno);
878                 break;
879         }
880         return NULL;
881 }
882
883 int
884 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
885 {
886 #ifdef HAVE_SYSTEMD
887         int ret;
888
889         if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
890                 return ret;
891         else {
892 #endif
893 #ifdef HAVE_SYS_UN_H
894         int s;
895         struct sockaddr_un usock;
896 #ifndef HAVE_SYSTEMD
897         (void)use_systemd;
898 #endif
899
900         verbose(VERB_ALGO, "creating unix socket %s", path);
901 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
902         /* this member exists on BSDs, not Linux */
903         usock.sun_len = (unsigned)sizeof(usock);
904 #endif
905         usock.sun_family = AF_LOCAL;
906         /* length is 92-108, 104 on FreeBSD */
907         (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
908
909         if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
910                 log_err("Cannot create local socket %s (%s)",
911                         path, strerror(errno));
912                 return -1;
913         }
914
915         if (unlink(path) && errno != ENOENT) {
916                 /* The socket already exists and cannot be removed */
917                 log_err("Cannot remove old local socket %s (%s)",
918                         path, strerror(errno));
919                 goto err;
920         }
921
922         if (bind(s, (struct sockaddr *)&usock,
923                 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
924                 log_err("Cannot bind local socket %s (%s)",
925                         path, strerror(errno));
926                 goto err;
927         }
928
929         if (!fd_set_nonblock(s)) {
930                 log_err("Cannot set non-blocking mode");
931                 goto err;
932         }
933
934         if (listen(s, TCP_BACKLOG) == -1) {
935                 log_err("can't listen: %s", strerror(errno));
936                 goto err;
937         }
938
939         (void)noproto; /*unused*/
940         return s;
941
942 err:
943         sock_close(s);
944         return -1;
945
946 #ifdef HAVE_SYSTEMD
947         }
948 #endif
949 #else
950         (void)use_systemd;
951         (void)path;
952         log_err("Local sockets are not supported");
953         *noproto = 1;
954         return -1;
955 #endif
956 }
957
958
959 /**
960  * Create socket from getaddrinfo results
961  */
962 static int
963 make_sock(int stype, const char* ifname, const char* port, 
964         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
965         int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
966         int use_systemd, int dscp, struct unbound_socket* ub_sock)
967 {
968         struct addrinfo *res = NULL;
969         int r, s, inuse, noproto;
970         hints->ai_socktype = stype;
971         *noip6 = 0;
972         if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
973 #ifdef USE_WINSOCK
974                 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
975                         *noip6 = 1; /* 'Host not found' for IP6 on winXP */
976                         return -1;
977                 }
978 #endif
979                 log_err("node %s:%s getaddrinfo: %s %s", 
980                         ifname?ifname:"default", port, gai_strerror(r),
981 #ifdef EAI_SYSTEM
982                         r==EAI_SYSTEM?(char*)strerror(errno):""
983 #else
984                         ""
985 #endif
986                 );
987                 return -1;
988         }
989         if(stype == SOCK_DGRAM) {
990                 verbose_print_addr(res);
991                 s = create_udp_sock(res->ai_family, res->ai_socktype,
992                         (struct sockaddr*)res->ai_addr, res->ai_addrlen,
993                         v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
994                         reuseport, transparent, freebind, use_systemd, dscp);
995                 if(s == -1 && inuse) {
996                         log_err("bind: address already in use");
997                 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
998                         *noip6 = 1;
999                 }
1000         } else  {
1001                 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
1002                         transparent, tcp_mss, nodelay, freebind, use_systemd,
1003                         dscp);
1004                 if(s == -1 && noproto && hints->ai_family == AF_INET6){
1005                         *noip6 = 1;
1006                 }
1007         }
1008
1009         ub_sock->addr = res;
1010         ub_sock->s = s;
1011         ub_sock->fam = hints->ai_family;
1012
1013         return s;
1014 }
1015
1016 /** make socket and first see if ifname contains port override info */
1017 static int
1018 make_sock_port(int stype, const char* ifname, const char* port, 
1019         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1020         int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
1021         int use_systemd, int dscp, struct unbound_socket* ub_sock)
1022 {
1023         char* s = strchr(ifname, '@');
1024         if(s) {
1025                 /* override port with ifspec@port */
1026                 char p[16];
1027                 char newif[128];
1028                 if((size_t)(s-ifname) >= sizeof(newif)) {
1029                         log_err("ifname too long: %s", ifname);
1030                         *noip6 = 0;
1031                         return -1;
1032                 }
1033                 if(strlen(s+1) >= sizeof(p)) {
1034                         log_err("portnumber too long: %s", ifname);
1035                         *noip6 = 0;
1036                         return -1;
1037                 }
1038                 (void)strlcpy(newif, ifname, sizeof(newif));
1039                 newif[s-ifname] = 0;
1040                 (void)strlcpy(p, s+1, sizeof(p));
1041                 p[strlen(s+1)]=0;
1042                 return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
1043                         snd, reuseport, transparent, tcp_mss, nodelay, freebind,
1044                         use_systemd, dscp, ub_sock);
1045         }
1046         return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1047                 reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
1048                 dscp, ub_sock);
1049 }
1050
1051 /**
1052  * Add port to open ports list.
1053  * @param list: list head. changed.
1054  * @param s: fd.
1055  * @param ftype: if fd is UDP.
1056  * @param ub_sock: socket with address.
1057  * @return false on failure. list in unchanged then.
1058  */
1059 static int
1060 port_insert(struct listen_port** list, int s, enum listen_type ftype, struct unbound_socket* ub_sock)
1061 {
1062         struct listen_port* item = (struct listen_port*)malloc(
1063                 sizeof(struct listen_port));
1064         if(!item)
1065                 return 0;
1066         item->next = *list;
1067         item->fd = s;
1068         item->ftype = ftype;
1069         item->socket = ub_sock;
1070         *list = item;
1071         return 1;
1072 }
1073
1074 /** set fd to receive source address packet info */
1075 static int
1076 set_recvpktinfo(int s, int family) 
1077 {
1078 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1079         int on = 1;
1080 #else
1081         (void)s;
1082 #endif
1083         if(family == AF_INET6) {
1084 #           ifdef IPV6_RECVPKTINFO
1085                 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1086                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1087                         log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1088                                 strerror(errno));
1089                         return 0;
1090                 }
1091 #           elif defined(IPV6_PKTINFO)
1092                 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1093                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1094                         log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1095                                 strerror(errno));
1096                         return 0;
1097                 }
1098 #           else
1099                 log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please "
1100                         "disable interface-automatic or do-ip6 in config");
1101                 return 0;
1102 #           endif /* defined IPV6_RECVPKTINFO */
1103
1104         } else if(family == AF_INET) {
1105 #           ifdef IP_PKTINFO
1106                 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1107                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1108                         log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1109                                 strerror(errno));
1110                         return 0;
1111                 }
1112 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1113                 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1114                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1115                         log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1116                                 strerror(errno));
1117                         return 0;
1118                 }
1119 #           else
1120                 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1121                         "interface-automatic or do-ip4 in config");
1122                 return 0;
1123 #           endif /* IP_PKTINFO */
1124
1125         }
1126         return 1;
1127 }
1128
1129 /** see if interface is ssl, its port number == the ssl port number */
1130 static int
1131 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1132         struct config_strlist* tls_additional_port)
1133 {
1134         struct config_strlist* s;
1135         char* p = strchr(ifname, '@');
1136         if(!p && atoi(port) == ssl_port)
1137                 return 1;
1138         if(p && atoi(p+1) == ssl_port)
1139                 return 1;
1140         for(s = tls_additional_port; s; s = s->next) {
1141                 if(p && atoi(p+1) == atoi(s->str))
1142                         return 1;
1143                 if(!p && atoi(port) == atoi(s->str))
1144                         return 1;
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * Helper for ports_open. Creates one interface (or NULL for default).
1151  * @param ifname: The interface ip address.
1152  * @param do_auto: use automatic interface detection.
1153  *      If enabled, then ifname must be the wildcard name.
1154  * @param do_udp: if udp should be used.
1155  * @param do_tcp: if udp should be used.
1156  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1157  * @param port: Port number to use (as string).
1158  * @param list: list of open ports, appended to, changed to point to list head.
1159  * @param rcv: receive buffer size for UDP
1160  * @param snd: send buffer size for UDP
1161  * @param ssl_port: ssl service port number
1162  * @param tls_additional_port: list of additional ssl service port numbers.
1163  * @param https_port: DoH service port number
1164  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1165  *      set to false on exit if reuseport failed due to no kernel support.
1166  * @param transparent: set IP_TRANSPARENT socket option.
1167  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1168  * @param freebind: set IP_FREEBIND socket option.
1169  * @param http2_nodelay: set TCP_NODELAY on HTTP/2 connection
1170  * @param use_systemd: if true, fetch sockets from systemd.
1171  * @param dnscrypt_port: dnscrypt service port number
1172  * @param dscp: DSCP to use.
1173  * @return: returns false on error.
1174  */
1175 static int
1176 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 
1177         struct addrinfo *hints, const char* port, struct listen_port** list,
1178         size_t rcv, size_t snd, int ssl_port,
1179         struct config_strlist* tls_additional_port, int https_port,
1180         int* reuseport, int transparent, int tcp_mss, int freebind,
1181         int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp)
1182 {
1183         int s, noip6=0;
1184         int is_https = if_is_https(ifname, port, https_port);
1185         int nodelay = is_https && http2_nodelay;
1186         struct unbound_socket* ub_sock;
1187 #ifdef USE_DNSCRYPT
1188         int is_dnscrypt = ((strchr(ifname, '@') && 
1189                         atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1190                         (!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1191 #else
1192         int is_dnscrypt = 0;
1193         (void)dnscrypt_port;
1194 #endif
1195
1196         if(!do_udp && !do_tcp)
1197                 return 0;
1198
1199         if(do_auto) {
1200                 ub_sock = calloc(1, sizeof(struct unbound_socket));
1201                 if(!ub_sock)
1202                         return 0;
1203                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
1204                         &noip6, rcv, snd, reuseport, transparent,
1205                         tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1206                         freeaddrinfo(ub_sock->addr);
1207                         free(ub_sock);
1208                         if(noip6) {
1209                                 log_warn("IPv6 protocol not available");
1210                                 return 1;
1211                         }
1212                         return 0;
1213                 }
1214                 /* getting source addr packet info is highly non-portable */
1215                 if(!set_recvpktinfo(s, hints->ai_family)) {
1216                         sock_close(s);
1217                         freeaddrinfo(ub_sock->addr);
1218                         free(ub_sock);
1219                         return 0;
1220                 }
1221                 if(!port_insert(list, s,
1222                    is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil, ub_sock)) {
1223                         sock_close(s);
1224                         freeaddrinfo(ub_sock->addr);
1225                         free(ub_sock);
1226                         return 0;
1227                 }
1228         } else if(do_udp) {
1229                 ub_sock = calloc(1, sizeof(struct unbound_socket));
1230                 if(!ub_sock)
1231                         return 0;
1232                 /* regular udp socket */
1233                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
1234                         &noip6, rcv, snd, reuseport, transparent,
1235                         tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
1236                         freeaddrinfo(ub_sock->addr);
1237                         free(ub_sock);
1238                         if(noip6) {
1239                                 log_warn("IPv6 protocol not available");
1240                                 return 1;
1241                         }
1242                         return 0;
1243                 }
1244                 if(!port_insert(list, s,
1245                    is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp, ub_sock)) {
1246                         sock_close(s);
1247                         freeaddrinfo(ub_sock->addr);
1248                         free(ub_sock);
1249                         return 0;
1250                 }
1251         }
1252         if(do_tcp) {
1253                 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1254                         tls_additional_port);
1255                 enum listen_type port_type;
1256                 ub_sock = calloc(1, sizeof(struct unbound_socket));
1257                 if(!ub_sock)
1258                         return 0;
1259                 if(is_ssl)
1260                         port_type = listen_type_ssl;
1261                 else if(is_https)
1262                         port_type = listen_type_http;
1263                 else if(is_dnscrypt)
1264                         port_type = listen_type_tcp_dnscrypt;
1265                 else
1266                         port_type = listen_type_tcp;
1267                 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 
1268                         &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
1269                         freebind, use_systemd, dscp, ub_sock)) == -1) {
1270                         freeaddrinfo(ub_sock->addr);
1271                         free(ub_sock);
1272                         if(noip6) {
1273                                 /*log_warn("IPv6 protocol not available");*/
1274                                 return 1;
1275                         }
1276                         return 0;
1277                 }
1278                 if(is_ssl)
1279                         verbose(VERB_ALGO, "setup TCP for SSL service");
1280                 if(!port_insert(list, s, port_type, ub_sock)) {
1281                         sock_close(s);
1282                         freeaddrinfo(ub_sock->addr);
1283                         free(ub_sock);
1284                         return 0;
1285                 }
1286         }
1287         return 1;
1288 }
1289
1290 /** 
1291  * Add items to commpoint list in front.
1292  * @param c: commpoint to add.
1293  * @param front: listen struct.
1294  * @return: false on failure.
1295  */
1296 static int
1297 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1298 {
1299         struct listen_list* item = (struct listen_list*)malloc(
1300                 sizeof(struct listen_list));
1301         if(!item)
1302                 return 0;
1303         item->com = c;
1304         item->next = front->cps;
1305         front->cps = item;
1306         return 1;
1307 }
1308
1309 struct listen_dnsport* 
1310 listen_create(struct comm_base* base, struct listen_port* ports,
1311         size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1312         int harden_large_queries, uint32_t http_max_streams,
1313         char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
1314         void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
1315         void *cb_arg)
1316 {
1317         struct listen_dnsport* front = (struct listen_dnsport*)
1318                 malloc(sizeof(struct listen_dnsport));
1319         if(!front)
1320                 return NULL;
1321         front->cps = NULL;
1322         front->udp_buff = sldns_buffer_new(bufsize);
1323 #ifdef USE_DNSCRYPT
1324         front->dnscrypt_udp_buff = NULL;
1325 #endif
1326         if(!front->udp_buff) {
1327                 free(front);
1328                 return NULL;
1329         }
1330         if(!stream_wait_lock_inited) {
1331                 lock_basic_init(&stream_wait_count_lock);
1332                 stream_wait_lock_inited = 1;
1333         }
1334         if(!http2_query_buffer_lock_inited) {
1335                 lock_basic_init(&http2_query_buffer_count_lock);
1336                 http2_query_buffer_lock_inited = 1;
1337         }
1338         if(!http2_response_buffer_lock_inited) {
1339                 lock_basic_init(&http2_response_buffer_count_lock);
1340                 http2_response_buffer_lock_inited = 1;
1341         }
1342
1343         /* create comm points as needed */
1344         while(ports) {
1345                 struct comm_point* cp = NULL;
1346                 if(ports->ftype == listen_type_udp ||
1347                    ports->ftype == listen_type_udp_dnscrypt)
1348                         cp = comm_point_create_udp(base, ports->fd, 
1349                                 front->udp_buff, cb, cb_arg, ports->socket);
1350                 else if(ports->ftype == listen_type_tcp ||
1351                                 ports->ftype == listen_type_tcp_dnscrypt)
1352                         cp = comm_point_create_tcp(base, ports->fd, 
1353                                 tcp_accept_count, tcp_idle_timeout,
1354                                 harden_large_queries, 0, NULL,
1355                                 tcp_conn_limit, bufsize, front->udp_buff,
1356                                 ports->ftype, cb, cb_arg, ports->socket);
1357                 else if(ports->ftype == listen_type_ssl ||
1358                         ports->ftype == listen_type_http) {
1359                         cp = comm_point_create_tcp(base, ports->fd, 
1360                                 tcp_accept_count, tcp_idle_timeout,
1361                                 harden_large_queries,
1362                                 http_max_streams, http_endpoint,
1363                                 tcp_conn_limit, bufsize, front->udp_buff,
1364                                 ports->ftype, cb, cb_arg, ports->socket);
1365                         if(http_notls && ports->ftype == listen_type_http)
1366                                 cp->ssl = NULL;
1367                         else
1368                                 cp->ssl = sslctx;
1369                         if(ports->ftype == listen_type_http) {
1370                                 if(!sslctx && !http_notls) {
1371                                   log_warn("HTTPS port configured, but no TLS "
1372                                         "tls-service-key or tls-service-pem "
1373                                         "set");
1374                                 }
1375 #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
1376                                 if(!http_notls)
1377                                   log_warn("Unbound is not compiled with an "
1378                                         "OpenSSL version supporting ALPN "
1379                                         " (OpenSSL >= 1.0.2). This is required "
1380                                         "to use DNS-over-HTTPS");
1381 #endif
1382 #ifndef HAVE_NGHTTP2_NGHTTP2_H
1383                                 log_warn("Unbound is not compiled with "
1384                                         "nghttp2. This is required to use "
1385                                         "DNS-over-HTTPS.");
1386 #endif
1387                         }
1388                 } else if(ports->ftype == listen_type_udpancil ||
1389                                   ports->ftype == listen_type_udpancil_dnscrypt)
1390                         cp = comm_point_create_udp_ancil(base, ports->fd, 
1391                                 front->udp_buff, cb, cb_arg, ports->socket);
1392                 if(!cp) {
1393                         log_err("can't create commpoint");      
1394                         listen_delete(front);
1395                         return NULL;
1396                 }
1397                 cp->dtenv = dtenv;
1398                 cp->do_not_close = 1;
1399 #ifdef USE_DNSCRYPT
1400                 if (ports->ftype == listen_type_udp_dnscrypt ||
1401                         ports->ftype == listen_type_tcp_dnscrypt ||
1402                         ports->ftype == listen_type_udpancil_dnscrypt) {
1403                         cp->dnscrypt = 1;
1404                         cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1405                         if(!cp->dnscrypt_buffer) {
1406                                 log_err("can't alloc dnscrypt_buffer");
1407                                 comm_point_delete(cp);
1408                                 listen_delete(front);
1409                                 return NULL;
1410                         }
1411                         front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1412                 }
1413 #endif
1414                 if(!listen_cp_insert(cp, front)) {
1415                         log_err("malloc failed");
1416                         comm_point_delete(cp);
1417                         listen_delete(front);
1418                         return NULL;
1419                 }
1420                 ports = ports->next;
1421         }
1422         if(!front->cps) {
1423                 log_err("Could not open sockets to accept queries.");
1424                 listen_delete(front);
1425                 return NULL;
1426         }
1427
1428         return front;
1429 }
1430
1431 void
1432 listen_list_delete(struct listen_list* list)
1433 {
1434         struct listen_list *p = list, *pn;
1435         while(p) {
1436                 pn = p->next;
1437                 comm_point_delete(p->com);
1438                 free(p);
1439                 p = pn;
1440         }
1441 }
1442
1443 void 
1444 listen_delete(struct listen_dnsport* front)
1445 {
1446         if(!front) 
1447                 return;
1448         listen_list_delete(front->cps);
1449 #ifdef USE_DNSCRYPT
1450         if(front->dnscrypt_udp_buff &&
1451                 front->udp_buff != front->dnscrypt_udp_buff) {
1452                 sldns_buffer_free(front->dnscrypt_udp_buff);
1453         }
1454 #endif
1455         sldns_buffer_free(front->udp_buff);
1456         free(front);
1457         if(stream_wait_lock_inited) {
1458                 stream_wait_lock_inited = 0;
1459                 lock_basic_destroy(&stream_wait_count_lock);
1460         }
1461         if(http2_query_buffer_lock_inited) {
1462                 http2_query_buffer_lock_inited = 0;
1463                 lock_basic_destroy(&http2_query_buffer_count_lock);
1464         }
1465         if(http2_response_buffer_lock_inited) {
1466                 http2_response_buffer_lock_inited = 0;
1467                 lock_basic_destroy(&http2_response_buffer_count_lock);
1468         }
1469 }
1470
1471 #ifdef HAVE_GETIFADDRS
1472 static int
1473 resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addresses, int *ip_addresses_size)
1474 {
1475         struct ifaddrs *ifa;
1476         void *tmpbuf;
1477         int last_ip_addresses_size = *ip_addresses_size;
1478
1479         for(ifa = ifas; ifa != NULL; ifa = ifa->ifa_next) {
1480                 sa_family_t family;
1481                 const char* atsign;
1482 #ifdef INET6      /* |   address ip    | % |  ifa name  | @ |  port  | nul */
1483                 char addr_buf[INET6_ADDRSTRLEN + 1 + IF_NAMESIZE + 1 + 16 + 1];
1484 #else
1485                 char addr_buf[INET_ADDRSTRLEN + 1 + 16 + 1];
1486 #endif
1487
1488                 if((atsign=strrchr(search_ifa, '@')) != NULL) {
1489                         if(strlen(ifa->ifa_name) != (size_t)(atsign-search_ifa)
1490                            || strncmp(ifa->ifa_name, search_ifa,
1491                            atsign-search_ifa) != 0)
1492                                 continue;
1493                 } else {
1494                         if(strcmp(ifa->ifa_name, search_ifa) != 0)
1495                                 continue;
1496                         atsign = "";
1497                 }
1498
1499                 if(ifa->ifa_addr == NULL)
1500                         continue;
1501
1502                 family = ifa->ifa_addr->sa_family;
1503                 if(family == AF_INET) {
1504                         char a4[INET_ADDRSTRLEN + 1];
1505                         struct sockaddr_in *in4 = (struct sockaddr_in *)
1506                                 ifa->ifa_addr;
1507                         if(!inet_ntop(family, &in4->sin_addr, a4, sizeof(a4))) {
1508                                 log_err("inet_ntop failed");
1509                                 return 0;
1510                         }
1511                         snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1512                                 a4, atsign);
1513                 }
1514 #ifdef INET6
1515                 else if(family == AF_INET6) {
1516                         struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)
1517                                 ifa->ifa_addr;
1518                         char a6[INET6_ADDRSTRLEN + 1];
1519                         char if_index_name[IF_NAMESIZE + 1];
1520                         if_index_name[0] = 0;
1521                         if(!inet_ntop(family, &in6->sin6_addr, a6, sizeof(a6))) {
1522                                 log_err("inet_ntop failed");
1523                                 return 0;
1524                         }
1525                         (void)if_indextoname(in6->sin6_scope_id,
1526                                 (char *)if_index_name);
1527                         if (strlen(if_index_name) != 0) {
1528                                 snprintf(addr_buf, sizeof(addr_buf),
1529                                         "%s%%%s%s", a6, if_index_name, atsign);
1530                         } else {
1531                                 snprintf(addr_buf, sizeof(addr_buf), "%s%s",
1532                                         a6, atsign);
1533                         }
1534                 }
1535 #endif
1536                 else {
1537                         continue;
1538                 }
1539                 verbose(4, "interface %s has address %s", search_ifa, addr_buf);
1540
1541                 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1542                 if(!tmpbuf) {
1543                         log_err("realloc failed: out of memory");
1544                         return 0;
1545                 } else {
1546                         *ip_addresses = tmpbuf;
1547                 }
1548                 (*ip_addresses)[*ip_addresses_size] = strdup(addr_buf);
1549                 if(!(*ip_addresses)[*ip_addresses_size]) {
1550                         log_err("strdup failed: out of memory");
1551                         return 0;
1552                 }
1553                 (*ip_addresses_size)++;
1554         }
1555
1556         if (*ip_addresses_size == last_ip_addresses_size) {
1557                 tmpbuf = realloc(*ip_addresses, sizeof(char *) * (*ip_addresses_size + 1));
1558                 if(!tmpbuf) {
1559                         log_err("realloc failed: out of memory");
1560                         return 0;
1561                 } else {
1562                         *ip_addresses = tmpbuf;
1563                 }
1564                 (*ip_addresses)[*ip_addresses_size] = strdup(search_ifa);
1565                 if(!(*ip_addresses)[*ip_addresses_size]) {
1566                         log_err("strdup failed: out of memory");
1567                         return 0;
1568                 }
1569                 (*ip_addresses_size)++;
1570         }
1571         return 1;
1572 }
1573 #endif /* HAVE_GETIFADDRS */
1574
1575 int resolve_interface_names(char** ifs, int num_ifs,
1576         struct config_strlist* list, char*** resif, int* num_resif)
1577 {
1578 #ifdef HAVE_GETIFADDRS
1579         struct ifaddrs *addrs = NULL;
1580         if(num_ifs == 0 && list == NULL) {
1581                 *resif = NULL;
1582                 *num_resif = 0;
1583                 return 1;
1584         }
1585         if(getifaddrs(&addrs) == -1) {
1586                 log_err("failed to list interfaces: getifaddrs: %s",
1587                         strerror(errno));
1588                 freeifaddrs(addrs);
1589                 return 0;
1590         }
1591         if(ifs) {
1592                 int i;
1593                 for(i=0; i<num_ifs; i++) {
1594                         if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) {
1595                                 freeifaddrs(addrs);
1596                                 config_del_strarray(*resif, *num_resif);
1597                                 *resif = NULL;
1598                                 *num_resif = 0;
1599                                 return 0;
1600                         }
1601                 }
1602         }
1603         if(list) {
1604                 struct config_strlist* p;
1605                 for(p = list; p; p = p->next) {
1606                         if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) {
1607                                 freeifaddrs(addrs);
1608                                 config_del_strarray(*resif, *num_resif);
1609                                 *resif = NULL;
1610                                 *num_resif = 0;
1611                                 return 0;
1612                         }
1613 }
1614         }
1615         freeifaddrs(addrs);
1616         return 1;
1617 #else
1618         struct config_strlist* p;
1619         if(num_ifs == 0 && list == NULL) {
1620                 *resif = NULL;
1621                 *num_resif = 0;
1622                 return 1;
1623         }
1624         *num_resif = num_ifs;
1625         for(p = list; p; p = p->next) {
1626                 (*num_resif)++;
1627         }
1628         *resif = calloc(*num_resif, sizeof(**resif));
1629         if(!*resif) {
1630                 log_err("out of memory");
1631                 return 0;
1632         }
1633         if(ifs) {
1634                 int i;
1635                 for(i=0; i<num_ifs; i++) {
1636                         (*resif)[i] = strdup(ifs[i]);
1637                         if(!((*resif)[i])) {
1638                                 log_err("out of memory");
1639                                 config_del_strarray(*resif, *num_resif);
1640                                 *resif = NULL;
1641                                 *num_resif = 0;
1642                                 return 0;
1643                         }
1644                 }
1645         }
1646         if(list) {
1647                 int idx = num_ifs;
1648                 for(p = list; p; p = p->next) {
1649                         (*resif)[idx] = strdup(p->str);
1650                         if(!((*resif)[idx])) {
1651                                 log_err("out of memory");
1652                                 config_del_strarray(*resif, *num_resif);
1653                                 *resif = NULL;
1654                                 *num_resif = 0;
1655                                 return 0;
1656                         }
1657                         idx++;
1658                 }
1659         }
1660         return 1;
1661 #endif /* HAVE_GETIFADDRS */
1662 }
1663
1664 struct listen_port* 
1665 listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
1666         int* reuseport)
1667 {
1668         struct listen_port* list = NULL;
1669         struct addrinfo hints;
1670         int i, do_ip4, do_ip6;
1671         int do_tcp, do_auto;
1672         char portbuf[32];
1673         snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1674         do_ip4 = cfg->do_ip4;
1675         do_ip6 = cfg->do_ip6;
1676         do_tcp = cfg->do_tcp;
1677         do_auto = cfg->if_automatic && cfg->do_udp;
1678         if(cfg->incoming_num_tcp == 0)
1679                 do_tcp = 0;
1680
1681         /* getaddrinfo */
1682         memset(&hints, 0, sizeof(hints));
1683         hints.ai_flags = AI_PASSIVE;
1684         /* no name lookups on our listening ports */
1685         if(num_ifs > 0)
1686                 hints.ai_flags |= AI_NUMERICHOST;
1687         hints.ai_family = AF_UNSPEC;
1688 #ifndef INET6
1689         do_ip6 = 0;
1690 #endif
1691         if(!do_ip4 && !do_ip6) {
1692                 return NULL;
1693         }
1694         /* create ip4 and ip6 ports so that return addresses are nice. */
1695         if(do_auto || num_ifs == 0) {
1696                 if(do_ip6) {
1697                         hints.ai_family = AF_INET6;
1698                         if(!ports_create_if(do_auto?"::0":"::1", 
1699                                 do_auto, cfg->do_udp, do_tcp, 
1700                                 &hints, portbuf, &list,
1701                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1702                                 cfg->ssl_port, cfg->tls_additional_port,
1703                                 cfg->https_port, reuseport, cfg->ip_transparent,
1704                                 cfg->tcp_mss, cfg->ip_freebind,
1705                                 cfg->http_nodelay, cfg->use_systemd,
1706                                 cfg->dnscrypt_port, cfg->ip_dscp)) {
1707                                 listening_ports_free(list);
1708                                 return NULL;
1709                         }
1710                 }
1711                 if(do_ip4) {
1712                         hints.ai_family = AF_INET;
1713                         if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 
1714                                 do_auto, cfg->do_udp, do_tcp, 
1715                                 &hints, portbuf, &list,
1716                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1717                                 cfg->ssl_port, cfg->tls_additional_port,
1718                                 cfg->https_port, reuseport, cfg->ip_transparent,
1719                                 cfg->tcp_mss, cfg->ip_freebind,
1720                                 cfg->http_nodelay, cfg->use_systemd,
1721                                 cfg->dnscrypt_port, cfg->ip_dscp)) {
1722                                 listening_ports_free(list);
1723                                 return NULL;
1724                         }
1725                 }
1726         } else for(i = 0; i<num_ifs; i++) {
1727                 if(str_is_ip6(ifs[i])) {
1728                         if(!do_ip6)
1729                                 continue;
1730                         hints.ai_family = AF_INET6;
1731                         if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1732                                 do_tcp, &hints, portbuf, &list, 
1733                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1734                                 cfg->ssl_port, cfg->tls_additional_port,
1735                                 cfg->https_port, reuseport, cfg->ip_transparent,
1736                                 cfg->tcp_mss, cfg->ip_freebind,
1737                                 cfg->http_nodelay, cfg->use_systemd,
1738                                 cfg->dnscrypt_port, cfg->ip_dscp)) {
1739                                 listening_ports_free(list);
1740                                 return NULL;
1741                         }
1742                 } else {
1743                         if(!do_ip4)
1744                                 continue;
1745                         hints.ai_family = AF_INET;
1746                         if(!ports_create_if(ifs[i], 0, cfg->do_udp,
1747                                 do_tcp, &hints, portbuf, &list, 
1748                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1749                                 cfg->ssl_port, cfg->tls_additional_port,
1750                                 cfg->https_port, reuseport, cfg->ip_transparent,
1751                                 cfg->tcp_mss, cfg->ip_freebind,
1752                                 cfg->http_nodelay, cfg->use_systemd,
1753                                 cfg->dnscrypt_port, cfg->ip_dscp)) {
1754                                 listening_ports_free(list);
1755                                 return NULL;
1756                         }
1757                 }
1758         }
1759
1760         return list;
1761 }
1762
1763 void listening_ports_free(struct listen_port* list)
1764 {
1765         struct listen_port* nx;
1766         while(list) {
1767                 nx = list->next;
1768                 if(list->fd != -1) {
1769                         sock_close(list->fd);
1770                 }
1771                 /* rc_ports don't have ub_socket */
1772                 if(list->socket) {
1773                         freeaddrinfo(list->socket->addr);
1774                         free(list->socket);
1775                 }
1776                 free(list);
1777                 list = nx;
1778         }
1779 }
1780
1781 size_t listen_get_mem(struct listen_dnsport* listen)
1782 {
1783         struct listen_list* p;
1784         size_t s = sizeof(*listen) + sizeof(*listen->base) + 
1785                 sizeof(*listen->udp_buff) + 
1786                 sldns_buffer_capacity(listen->udp_buff);
1787 #ifdef USE_DNSCRYPT
1788         s += sizeof(*listen->dnscrypt_udp_buff);
1789         if(listen->udp_buff != listen->dnscrypt_udp_buff){
1790                 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1791         }
1792 #endif
1793         for(p = listen->cps; p; p = p->next) {
1794                 s += sizeof(*p);
1795                 s += comm_point_get_mem(p->com);
1796         }
1797         return s;
1798 }
1799
1800 void listen_stop_accept(struct listen_dnsport* listen)
1801 {
1802         /* do not stop the ones that have no tcp_free list
1803          * (they have already stopped listening) */
1804         struct listen_list* p;
1805         for(p=listen->cps; p; p=p->next) {
1806                 if(p->com->type == comm_tcp_accept &&
1807                         p->com->tcp_free != NULL) {
1808                         comm_point_stop_listening(p->com);
1809                 }
1810         }
1811 }
1812
1813 void listen_start_accept(struct listen_dnsport* listen)
1814 {
1815         /* do not start the ones that have no tcp_free list, it is no
1816          * use to listen to them because they have no free tcp handlers */
1817         struct listen_list* p;
1818         for(p=listen->cps; p; p=p->next) {
1819                 if(p->com->type == comm_tcp_accept &&
1820                         p->com->tcp_free != NULL) {
1821                         comm_point_start_listening(p->com, -1, -1);
1822                 }
1823         }
1824 }
1825
1826 struct tcp_req_info*
1827 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1828 {
1829         struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1830         if(!req) {
1831                 log_err("malloc failure for new stream outoforder processing structure");
1832                 return NULL;
1833         }
1834         memset(req, 0, sizeof(*req));
1835         req->spool_buffer = spoolbuf;
1836         return req;
1837 }
1838
1839 void
1840 tcp_req_info_delete(struct tcp_req_info* req)
1841 {
1842         if(!req) return;
1843         tcp_req_info_clear(req);
1844         /* cp is pointer back to commpoint that owns this struct and
1845          * called delete on us */
1846         /* spool_buffer is shared udp buffer, not deleted here */
1847         free(req);
1848 }
1849
1850 void tcp_req_info_clear(struct tcp_req_info* req)
1851 {
1852         struct tcp_req_open_item* open, *nopen;
1853         struct tcp_req_done_item* item, *nitem;
1854         if(!req) return;
1855
1856         /* free outstanding request mesh reply entries */
1857         open = req->open_req_list;
1858         while(open) {
1859                 nopen = open->next;
1860                 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1861                 free(open);
1862                 open = nopen;
1863         }
1864         req->open_req_list = NULL;
1865         req->num_open_req = 0;
1866         
1867         /* free pending writable result packets */
1868         item = req->done_req_list;
1869         while(item) {
1870                 nitem = item->next;
1871                 lock_basic_lock(&stream_wait_count_lock);
1872                 stream_wait_count -= (sizeof(struct tcp_req_done_item)
1873                         +item->len);
1874                 lock_basic_unlock(&stream_wait_count_lock);
1875                 free(item->buf);
1876                 free(item);
1877                 item = nitem;
1878         }
1879         req->done_req_list = NULL;
1880         req->num_done_req = 0;
1881         req->read_is_closed = 0;
1882 }
1883
1884 void
1885 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1886 {
1887         struct tcp_req_open_item* open, *prev = NULL;
1888         if(!req || !m) return;
1889         open = req->open_req_list;
1890         while(open) {
1891                 if(open->mesh_state == m) {
1892                         struct tcp_req_open_item* next;
1893                         if(prev) prev->next = open->next;
1894                         else req->open_req_list = open->next;
1895                         /* caller has to manage the mesh state reply entry */
1896                         next = open->next;
1897                         free(open);
1898                         req->num_open_req --;
1899
1900                         /* prev = prev; */
1901                         open = next;
1902                         continue;
1903                 }
1904                 prev = open;
1905                 open = open->next;
1906         }
1907 }
1908
1909 /** setup listening for read or write */
1910 static void
1911 tcp_req_info_setup_listen(struct tcp_req_info* req)
1912 {
1913         int wr = 0;
1914         int rd = 0;
1915
1916         if(req->cp->tcp_byte_count != 0) {
1917                 /* cannot change, halfway through */
1918                 return;
1919         }
1920
1921         if(!req->cp->tcp_is_reading)
1922                 wr = 1;
1923         if(!req->read_is_closed)
1924                 rd = 1;
1925         
1926         if(wr) {
1927                 req->cp->tcp_is_reading = 0;
1928                 comm_point_stop_listening(req->cp);
1929                 comm_point_start_listening(req->cp, -1,
1930                         adjusted_tcp_timeout(req->cp));
1931         } else if(rd) {
1932                 req->cp->tcp_is_reading = 1;
1933                 comm_point_stop_listening(req->cp);
1934                 comm_point_start_listening(req->cp, -1,
1935                         adjusted_tcp_timeout(req->cp));
1936                 /* and also read it (from SSL stack buffers), so
1937                  * no event read event is expected since the remainder of
1938                  * the TLS frame is sitting in the buffers. */
1939                 req->read_again = 1;
1940         } else {
1941                 comm_point_stop_listening(req->cp);
1942                 comm_point_start_listening(req->cp, -1,
1943                         adjusted_tcp_timeout(req->cp));
1944                 comm_point_listen_for_rw(req->cp, 0, 0);
1945         }
1946 }
1947
1948 /** remove first item from list of pending results */
1949 static struct tcp_req_done_item*
1950 tcp_req_info_pop_done(struct tcp_req_info* req)
1951 {
1952         struct tcp_req_done_item* item;
1953         log_assert(req->num_done_req > 0 && req->done_req_list);
1954         item = req->done_req_list;
1955         lock_basic_lock(&stream_wait_count_lock);
1956         stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1957         lock_basic_unlock(&stream_wait_count_lock);
1958         req->done_req_list = req->done_req_list->next;
1959         req->num_done_req --;
1960         return item;
1961 }
1962
1963 /** Send given buffer and setup to write */
1964 static void
1965 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1966         size_t len)
1967 {
1968         sldns_buffer_clear(req->cp->buffer);
1969         sldns_buffer_write(req->cp->buffer, buf, len);
1970         sldns_buffer_flip(req->cp->buffer);
1971
1972         req->cp->tcp_is_reading = 0; /* we are now writing */
1973 }
1974
1975 /** pick up the next result and start writing it to the channel */
1976 static void
1977 tcp_req_pickup_next_result(struct tcp_req_info* req)
1978 {
1979         if(req->num_done_req > 0) {
1980                 /* unlist the done item from the list of pending results */
1981                 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1982                 tcp_req_info_start_write_buf(req, item->buf, item->len);
1983                 free(item->buf);
1984                 free(item);
1985         }
1986 }
1987
1988 /** the read channel has closed */
1989 int
1990 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1991 {
1992         verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1993         /* reset byte count for (potential) partial read */
1994         req->cp->tcp_byte_count = 0;
1995         /* if we still have results to write, pick up next and write it */
1996         if(req->num_done_req != 0) {
1997                 tcp_req_pickup_next_result(req);
1998                 tcp_req_info_setup_listen(req);
1999                 return 1;
2000         }
2001         /* if nothing to do, this closes the connection */
2002         if(req->num_open_req == 0 && req->num_done_req == 0)
2003                 return 0;
2004         /* otherwise, we must be waiting for dns resolve, wait with timeout */
2005         req->read_is_closed = 1;
2006         tcp_req_info_setup_listen(req);
2007         return 1;
2008 }
2009
2010 void
2011 tcp_req_info_handle_writedone(struct tcp_req_info* req)
2012 {
2013         /* back to reading state, we finished this write event */
2014         sldns_buffer_clear(req->cp->buffer);
2015         if(req->num_done_req == 0 && req->read_is_closed) {
2016                 /* no more to write and nothing to read, close it */
2017                 comm_point_drop_reply(&req->cp->repinfo);
2018                 return;
2019         }
2020         req->cp->tcp_is_reading = 1;
2021         /* see if another result needs writing */
2022         tcp_req_pickup_next_result(req);
2023
2024         /* see if there is more to write, if not stop_listening for writing */
2025         /* see if new requests are allowed, if so, start_listening
2026          * for reading */
2027         tcp_req_info_setup_listen(req);
2028 }
2029
2030 void
2031 tcp_req_info_handle_readdone(struct tcp_req_info* req)
2032 {
2033         struct comm_point* c = req->cp;
2034
2035         /* we want to read up several requests, unless there are
2036          * pending answers */
2037
2038         req->is_drop = 0;
2039         req->is_reply = 0;
2040         req->in_worker_handle = 1;
2041         sldns_buffer_set_limit(req->spool_buffer, 0);
2042         /* handle the current request */
2043         /* this calls the worker handle request routine that could give
2044          * a cache response, or localdata response, or drop the reply,
2045          * or schedule a mesh entry for later */
2046         fptr_ok(fptr_whitelist_comm_point(c->callback));
2047         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
2048                 req->in_worker_handle = 0;
2049                 /* there is an answer, put it up.  It is already in the
2050                  * c->buffer, just send it. */
2051                 /* since we were just reading a query, the channel is
2052                  * clear to write to */
2053         send_it:
2054                 c->tcp_is_reading = 0;
2055                 comm_point_stop_listening(c);
2056                 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2057                 return;
2058         }
2059         req->in_worker_handle = 0;
2060         /* it should be waiting in the mesh for recursion.
2061          * If mesh failed to add a new entry and called commpoint_drop_reply. 
2062          * Then the mesh state has been cleared. */
2063         if(req->is_drop) {
2064                 /* the reply has been dropped, stream has been closed. */
2065                 return;
2066         }
2067         /* If mesh failed(mallocfail) and called commpoint_send_reply with
2068          * something like servfail then we pick up that reply below. */
2069         if(req->is_reply) {
2070                 goto send_it;
2071         }
2072
2073         sldns_buffer_clear(c->buffer);
2074         /* if pending answers, pick up an answer and start sending it */
2075         tcp_req_pickup_next_result(req);
2076
2077         /* if answers pending, start sending answers */
2078         /* read more requests if we can have more requests */
2079         tcp_req_info_setup_listen(req);
2080 }
2081
2082 int
2083 tcp_req_info_add_meshstate(struct tcp_req_info* req,
2084         struct mesh_area* mesh, struct mesh_state* m)
2085 {
2086         struct tcp_req_open_item* item;
2087         log_assert(req && mesh && m);
2088         item = (struct tcp_req_open_item*)malloc(sizeof(*item));
2089         if(!item) return 0;
2090         item->next = req->open_req_list;
2091         item->mesh = mesh;
2092         item->mesh_state = m;
2093         req->open_req_list = item;
2094         req->num_open_req++;
2095         return 1;
2096 }
2097
2098 /** Add a result to the result list.  At the end. */
2099 static int
2100 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
2101 {
2102         struct tcp_req_done_item* last = NULL;
2103         struct tcp_req_done_item* item;
2104         size_t space;
2105
2106         /* see if we have space */
2107         space = sizeof(struct tcp_req_done_item) + len;
2108         lock_basic_lock(&stream_wait_count_lock);
2109         if(stream_wait_count + space > stream_wait_max) {
2110                 lock_basic_unlock(&stream_wait_count_lock);
2111                 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
2112                 return 0;
2113         }
2114         stream_wait_count += space;
2115         lock_basic_unlock(&stream_wait_count_lock);
2116
2117         /* find last element */
2118         last = req->done_req_list;
2119         while(last && last->next)
2120                 last = last->next;
2121         
2122         /* create new element */
2123         item = (struct tcp_req_done_item*)malloc(sizeof(*item));
2124         if(!item) {
2125                 log_err("malloc failure, for stream result list");
2126                 return 0;
2127         }
2128         item->next = NULL;
2129         item->len = len;
2130         item->buf = memdup(buf, len);
2131         if(!item->buf) {
2132                 free(item);
2133                 log_err("malloc failure, adding reply to stream result list");
2134                 return 0;
2135         }
2136
2137         /* link in */
2138         if(last) last->next = item;
2139         else req->done_req_list = item;
2140         req->num_done_req++;
2141         return 1;
2142 }
2143
2144 void
2145 tcp_req_info_send_reply(struct tcp_req_info* req)
2146 {
2147         if(req->in_worker_handle) {
2148                 /* reply from mesh is in the spool_buffer */
2149                 /* copy now, so that the spool buffer is free for other tasks
2150                  * before the callback is done */
2151                 sldns_buffer_clear(req->cp->buffer);
2152                 sldns_buffer_write(req->cp->buffer,
2153                         sldns_buffer_begin(req->spool_buffer),
2154                         sldns_buffer_limit(req->spool_buffer));
2155                 sldns_buffer_flip(req->cp->buffer);
2156                 req->is_reply = 1;
2157                 return;
2158         }
2159         /* now that the query has been handled, that mesh_reply entry
2160          * should be removed, from the tcp_req_info list,
2161          * the mesh state cleanup removes then with region_cleanup and
2162          * replies_sent true. */
2163         /* see if we can send it straight away (we are not doing
2164          * anything else).  If so, copy to buffer and start */
2165         if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
2166                 /* buffer is free, and was ready to read new query into,
2167                  * but we are now going to use it to send this answer */
2168                 tcp_req_info_start_write_buf(req,
2169                         sldns_buffer_begin(req->spool_buffer),
2170                         sldns_buffer_limit(req->spool_buffer));
2171                 /* switch to listen to write events */
2172                 comm_point_stop_listening(req->cp);
2173                 comm_point_start_listening(req->cp, -1,
2174                         adjusted_tcp_timeout(req->cp));
2175                 return;
2176         }
2177         /* queue up the answer behind the others already pending */
2178         if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
2179                 sldns_buffer_limit(req->spool_buffer))) {
2180                 /* drop the connection, we are out of resources */
2181                 comm_point_drop_reply(&req->cp->repinfo);
2182         }
2183 }
2184
2185 size_t tcp_req_info_get_stream_buffer_size(void)
2186 {
2187         size_t s;
2188         if(!stream_wait_lock_inited)
2189                 return stream_wait_count;
2190         lock_basic_lock(&stream_wait_count_lock);
2191         s = stream_wait_count;
2192         lock_basic_unlock(&stream_wait_count_lock);
2193         return s;
2194 }
2195
2196 size_t http2_get_query_buffer_size(void)
2197 {
2198         size_t s;
2199         if(!http2_query_buffer_lock_inited)
2200                 return http2_query_buffer_count;
2201         lock_basic_lock(&http2_query_buffer_count_lock);
2202         s = http2_query_buffer_count;
2203         lock_basic_unlock(&http2_query_buffer_count_lock);
2204         return s;
2205 }
2206
2207 size_t http2_get_response_buffer_size(void)
2208 {
2209         size_t s;
2210         if(!http2_response_buffer_lock_inited)
2211                 return http2_response_buffer_count;
2212         lock_basic_lock(&http2_response_buffer_count_lock);
2213         s = http2_response_buffer_count;
2214         lock_basic_unlock(&http2_response_buffer_count_lock);
2215         return s;
2216 }
2217
2218 #ifdef HAVE_NGHTTP2
2219 /** nghttp2 callback. Used to copy response from rbuffer to nghttp2 session */
2220 static ssize_t http2_submit_response_read_callback(
2221         nghttp2_session* ATTR_UNUSED(session),
2222         int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2223         nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2224 {
2225         struct http2_stream* h2_stream;
2226         struct http2_session* h2_session = source->ptr;
2227         size_t copylen = length;
2228         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2229                 h2_session->session, stream_id))) {
2230                 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2231                         "stream");
2232                 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2233         }
2234         if(!h2_stream->rbuffer ||
2235                 sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2236                 verbose(VERB_QUERY, "http2: cannot submit buffer. No data "
2237                         "available in rbuffer");
2238                 /* rbuffer will be free'd in frame close cb */
2239                 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2240         }
2241
2242         if(copylen > sldns_buffer_remaining(h2_stream->rbuffer))
2243                 copylen = sldns_buffer_remaining(h2_stream->rbuffer);
2244         if(copylen > SSIZE_MAX)
2245                 copylen = SSIZE_MAX; /* will probably never happen */
2246
2247         memcpy(buf, sldns_buffer_current(h2_stream->rbuffer), copylen);
2248         sldns_buffer_skip(h2_stream->rbuffer, copylen);
2249
2250         if(sldns_buffer_remaining(h2_stream->rbuffer) == 0) {
2251                 *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2252                 lock_basic_lock(&http2_response_buffer_count_lock);
2253                 http2_response_buffer_count -=
2254                         sldns_buffer_capacity(h2_stream->rbuffer);
2255                 lock_basic_unlock(&http2_response_buffer_count_lock);
2256                 sldns_buffer_free(h2_stream->rbuffer);
2257                 h2_stream->rbuffer = NULL;
2258         }
2259
2260         return copylen;
2261 }
2262
2263 /**
2264  * Send RST_STREAM frame for stream.
2265  * @param h2_session: http2 session to submit frame to
2266  * @param h2_stream: http2 stream containing frame ID to use in RST_STREAM
2267  * @return 0 on error, 1 otherwise
2268  */
2269 static int http2_submit_rst_stream(struct http2_session* h2_session,
2270                 struct http2_stream* h2_stream)
2271 {
2272         int ret = nghttp2_submit_rst_stream(h2_session->session,
2273                 NGHTTP2_FLAG_NONE, h2_stream->stream_id,
2274                 NGHTTP2_INTERNAL_ERROR);
2275         if(ret) {
2276                 verbose(VERB_QUERY, "http2: nghttp2_submit_rst_stream failed, "
2277                         "error: %s", nghttp2_strerror(ret));
2278                 return 0;
2279         }
2280         return 1;
2281 }
2282
2283 /**
2284  * DNS response ready to be submitted to nghttp2, to be prepared for sending
2285  * out. Response is stored in c->buffer. Copy to rbuffer because the c->buffer
2286  * might be used before this will be sent out.
2287  * @param h2_session: http2 session, containing c->buffer which contains answer
2288  * @return 0 on error, 1 otherwise
2289  */
2290 int http2_submit_dns_response(struct http2_session* h2_session)
2291 {
2292         int ret;
2293         nghttp2_data_provider data_prd;
2294         char status[4];
2295         nghttp2_nv headers[3];
2296         struct http2_stream* h2_stream = h2_session->c->h2_stream;
2297         size_t rlen;
2298         char rlen_str[32];
2299
2300         if(h2_stream->rbuffer) {
2301                 log_err("http2 submit response error: rbuffer already "
2302                         "exists");
2303                 return 0;
2304         }
2305         if(sldns_buffer_remaining(h2_session->c->buffer) == 0) {
2306                 log_err("http2 submit response error: c->buffer not complete");
2307                 return 0;
2308         }
2309
2310         if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2311                 verbose(VERB_QUERY, "http2: submit response error: "
2312                         "invalid status");
2313                 return 0;
2314         }
2315
2316         rlen = sldns_buffer_remaining(h2_session->c->buffer);
2317         snprintf(rlen_str, sizeof(rlen_str), "%u", (unsigned)rlen);
2318
2319         lock_basic_lock(&http2_response_buffer_count_lock);
2320         if(http2_response_buffer_count + rlen > http2_response_buffer_max) {
2321                 lock_basic_unlock(&http2_response_buffer_count_lock);
2322                 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2323                         "in https-response-buffer-size");
2324                 return http2_submit_rst_stream(h2_session, h2_stream);
2325         }
2326         http2_response_buffer_count += rlen;
2327         lock_basic_unlock(&http2_response_buffer_count_lock);
2328
2329         if(!(h2_stream->rbuffer = sldns_buffer_new(rlen))) {
2330                 lock_basic_lock(&http2_response_buffer_count_lock);
2331                 http2_response_buffer_count -= rlen;
2332                 lock_basic_unlock(&http2_response_buffer_count_lock);
2333                 log_err("http2 submit response error: malloc failure");
2334                 return 0;
2335         }
2336
2337         headers[0].name = (uint8_t*)":status";
2338         headers[0].namelen = 7;
2339         headers[0].value = (uint8_t*)status;
2340         headers[0].valuelen = 3;
2341         headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2342
2343         headers[1].name = (uint8_t*)"content-type";
2344         headers[1].namelen = 12;
2345         headers[1].value = (uint8_t*)"application/dns-message";
2346         headers[1].valuelen = 23;
2347         headers[1].flags = NGHTTP2_NV_FLAG_NONE;
2348
2349         headers[2].name = (uint8_t*)"content-length";
2350         headers[2].namelen = 14;
2351         headers[2].value = (uint8_t*)rlen_str;
2352         headers[2].valuelen = strlen(rlen_str);
2353         headers[2].flags = NGHTTP2_NV_FLAG_NONE;
2354
2355         sldns_buffer_write(h2_stream->rbuffer,
2356                 sldns_buffer_current(h2_session->c->buffer),
2357                 sldns_buffer_remaining(h2_session->c->buffer));
2358         sldns_buffer_flip(h2_stream->rbuffer);
2359
2360         data_prd.source.ptr = h2_session;
2361         data_prd.read_callback = http2_submit_response_read_callback;
2362         ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2363                 headers, 3, &data_prd);
2364         if(ret) {
2365                 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2366                         "error: %s", nghttp2_strerror(ret));
2367                 return 0;
2368         }
2369         return 1;
2370 }
2371 #else
2372 int http2_submit_dns_response(void* ATTR_UNUSED(v))
2373 {
2374         return 0;
2375 }
2376 #endif
2377
2378 #ifdef HAVE_NGHTTP2
2379 /** HTTP status to descriptive string */
2380 static char* http_status_to_str(enum http_status s)
2381 {
2382         switch(s) {
2383                 case HTTP_STATUS_OK:
2384                         return "OK";
2385                 case HTTP_STATUS_BAD_REQUEST:
2386                         return "Bad Request";
2387                 case HTTP_STATUS_NOT_FOUND:
2388                         return "Not Found";
2389                 case HTTP_STATUS_PAYLOAD_TOO_LARGE:
2390                         return "Payload Too Large";
2391                 case HTTP_STATUS_URI_TOO_LONG:
2392                         return "URI Too Long";
2393                 case HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE:
2394                         return "Unsupported Media Type";
2395                 case HTTP_STATUS_NOT_IMPLEMENTED:
2396                         return "Not Implemented";
2397         }
2398         return "Status Unknown";
2399 }
2400
2401 /** nghttp2 callback. Used to copy error message to nghttp2 session */
2402 static ssize_t http2_submit_error_read_callback(
2403         nghttp2_session* ATTR_UNUSED(session),
2404         int32_t stream_id, uint8_t* buf, size_t length, uint32_t* data_flags,
2405         nghttp2_data_source* source, void* ATTR_UNUSED(cb_arg))
2406 {
2407         struct http2_stream* h2_stream;
2408         struct http2_session* h2_session = source->ptr;
2409         char* msg;
2410         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2411                 h2_session->session, stream_id))) {
2412                 verbose(VERB_QUERY, "http2: cannot get stream data, closing "
2413                         "stream");
2414                 return NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE;
2415         }
2416         *data_flags |= NGHTTP2_DATA_FLAG_EOF;
2417         msg = http_status_to_str(h2_stream->status);
2418         if(length < strlen(msg))
2419                 return 0; /* not worth trying over multiple frames */
2420         memcpy(buf, msg, strlen(msg));
2421         return strlen(msg);
2422
2423 }
2424
2425 /**
2426  * HTTP error response ready to be submitted to nghttp2, to be prepared for
2427  * sending out. Message body will contain descriptive string for HTTP status.
2428  * @param h2_session: http2 session to submit to
2429  * @param h2_stream: http2 stream containing HTTP status to use for error
2430  * @return 0 on error, 1 otherwise
2431  */
2432 static int http2_submit_error(struct http2_session* h2_session,
2433         struct http2_stream* h2_stream)
2434 {
2435         int ret;
2436         char status[4];
2437         nghttp2_data_provider data_prd;
2438         nghttp2_nv headers[1]; /* will be copied by nghttp */
2439         if(snprintf(status, 4, "%d", h2_stream->status) != 3) {
2440                 verbose(VERB_QUERY, "http2: submit error failed, "
2441                         "invalid status");
2442                 return 0;
2443         }
2444         headers[0].name = (uint8_t*)":status";
2445         headers[0].namelen = 7;
2446         headers[0].value = (uint8_t*)status;
2447         headers[0].valuelen = 3;
2448         headers[0].flags = NGHTTP2_NV_FLAG_NONE;
2449
2450         data_prd.source.ptr = h2_session;
2451         data_prd.read_callback = http2_submit_error_read_callback;
2452
2453         ret = nghttp2_submit_response(h2_session->session, h2_stream->stream_id,
2454                 headers, 1, &data_prd);
2455         if(ret) {
2456                 verbose(VERB_QUERY, "http2: submit error failed, "
2457                         "error: %s", nghttp2_strerror(ret));
2458                 return 0;
2459         }
2460         return 1;
2461 }
2462
2463 /**
2464  * Start query handling. Query is stored in the stream, and will be free'd here.
2465  * @param h2_session: http2 session, containing comm point
2466  * @param h2_stream: stream containing buffered query
2467  * @return: -1 on error, 1 if answer is stored in c->buffer, 0 if there is no
2468  * reply available (yet).
2469  */
2470 static int http2_query_read_done(struct http2_session* h2_session,
2471         struct http2_stream* h2_stream)
2472 {
2473         log_assert(h2_stream->qbuffer);
2474
2475         if(h2_session->c->h2_stream) {
2476                 verbose(VERB_ALGO, "http2_query_read_done failure: shared "
2477                         "buffer already assigned to stream");
2478                 return -1;
2479         }
2480     
2481     /* the c->buffer might be used by mesh_send_reply and no be cleard
2482          * need to be cleared before use */
2483         sldns_buffer_clear(h2_session->c->buffer);
2484         if(sldns_buffer_remaining(h2_session->c->buffer) <
2485                 sldns_buffer_remaining(h2_stream->qbuffer)) {
2486                 /* qbuffer will be free'd in frame close cb */
2487                 sldns_buffer_clear(h2_session->c->buffer);
2488                 verbose(VERB_ALGO, "http2_query_read_done failure: can't fit "
2489                         "qbuffer in c->buffer");
2490                 return -1;
2491         }
2492
2493         sldns_buffer_write(h2_session->c->buffer,
2494                 sldns_buffer_current(h2_stream->qbuffer),
2495                 sldns_buffer_remaining(h2_stream->qbuffer));
2496
2497         lock_basic_lock(&http2_query_buffer_count_lock);
2498         http2_query_buffer_count -= sldns_buffer_capacity(h2_stream->qbuffer);
2499         lock_basic_unlock(&http2_query_buffer_count_lock);
2500         sldns_buffer_free(h2_stream->qbuffer);
2501         h2_stream->qbuffer = NULL;
2502
2503         sldns_buffer_flip(h2_session->c->buffer);
2504         h2_session->c->h2_stream = h2_stream;
2505         fptr_ok(fptr_whitelist_comm_point(h2_session->c->callback));
2506         if((*h2_session->c->callback)(h2_session->c, h2_session->c->cb_arg,
2507                 NETEVENT_NOERROR, &h2_session->c->repinfo)) {
2508                 return 1; /* answer in c->buffer */
2509         }
2510         sldns_buffer_clear(h2_session->c->buffer);
2511         h2_session->c->h2_stream = NULL;
2512         return 0; /* mesh state added, or dropped */
2513 }
2514
2515 /** nghttp2 callback. Used to check if the received frame indicates the end of a
2516  * stream. Gather collected request data and start query handling. */
2517 static int http2_req_frame_recv_cb(nghttp2_session* session,
2518         const nghttp2_frame* frame, void* cb_arg)
2519 {
2520         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2521         struct http2_stream* h2_stream;
2522         int query_read_done;
2523
2524         if((frame->hd.type != NGHTTP2_DATA &&
2525                 frame->hd.type != NGHTTP2_HEADERS) ||
2526                 !(frame->hd.flags & NGHTTP2_FLAG_END_STREAM)) {
2527                         return 0;
2528         }
2529
2530         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2531                 session, frame->hd.stream_id)))
2532                 return 0;
2533
2534         if(h2_stream->invalid_endpoint) {
2535                 h2_stream->status = HTTP_STATUS_NOT_FOUND;
2536                 goto submit_http_error;
2537         }
2538
2539         if(h2_stream->invalid_content_type) {
2540                 h2_stream->status = HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE;
2541                 goto submit_http_error;
2542         }
2543
2544         if(h2_stream->http_method != HTTP_METHOD_GET &&
2545                 h2_stream->http_method != HTTP_METHOD_POST) {
2546                 h2_stream->status = HTTP_STATUS_NOT_IMPLEMENTED;
2547                 goto submit_http_error;
2548         }
2549
2550         if(h2_stream->query_too_large) {
2551                 if(h2_stream->http_method == HTTP_METHOD_POST)
2552                         h2_stream->status = HTTP_STATUS_PAYLOAD_TOO_LARGE;
2553                 else
2554                         h2_stream->status = HTTP_STATUS_URI_TOO_LONG;
2555                 goto submit_http_error;
2556         }
2557
2558         if(!h2_stream->qbuffer) {
2559                 h2_stream->status = HTTP_STATUS_BAD_REQUEST;
2560                 goto submit_http_error;
2561         }
2562
2563         if(h2_stream->status) {
2564 submit_http_error:
2565                 verbose(VERB_QUERY, "http2 request invalid, returning :status="
2566                         "%d", h2_stream->status);
2567                 if(!http2_submit_error(h2_session, h2_stream)) {
2568                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2569                 }
2570                 return 0;
2571         }
2572         h2_stream->status = HTTP_STATUS_OK;
2573
2574         sldns_buffer_flip(h2_stream->qbuffer);
2575         h2_session->postpone_drop = 1;
2576         query_read_done = http2_query_read_done(h2_session, h2_stream);
2577         if(query_read_done < 0)
2578                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2579         else if(!query_read_done) {
2580                 if(h2_session->is_drop) {
2581                         /* connection needs to be closed. Return failure to make
2582                          * sure no other action are taken anymore on comm point.
2583                          * failure will result in reclaiming (and closing)
2584                          * of comm point. */
2585                         verbose(VERB_QUERY, "http2 query dropped in worker cb");
2586                         h2_session->postpone_drop = 0;
2587                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2588                 }
2589                 /* nothing to submit right now, query added to mesh. */
2590                 h2_session->postpone_drop = 0;
2591                 return 0;
2592         }
2593         if(!http2_submit_dns_response(h2_session)) {
2594                 sldns_buffer_clear(h2_session->c->buffer);
2595                 h2_session->c->h2_stream = NULL;
2596                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2597         }
2598         verbose(VERB_QUERY, "http2 query submitted to session");
2599         sldns_buffer_clear(h2_session->c->buffer);
2600         h2_session->c->h2_stream = NULL;
2601         return 0;
2602 }
2603
2604 /** nghttp2 callback. Used to detect start of new streams. */
2605 static int http2_req_begin_headers_cb(nghttp2_session* session,
2606         const nghttp2_frame* frame, void* cb_arg)
2607 {
2608         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2609         struct http2_stream* h2_stream;
2610         int ret;
2611         if(frame->hd.type != NGHTTP2_HEADERS ||
2612                 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2613                 /* only interrested in request headers */
2614                 return 0;
2615         }
2616         if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) {
2617                 log_err("malloc failure while creating http2 stream");
2618                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2619         }
2620         http2_session_add_stream(h2_session, h2_stream);
2621         ret = nghttp2_session_set_stream_user_data(session,
2622                 frame->hd.stream_id, h2_stream);
2623         if(ret) {
2624                 /* stream does not exist */
2625                 verbose(VERB_QUERY, "http2: set_stream_user_data failed, "
2626                         "error: %s", nghttp2_strerror(ret));
2627                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2628         }
2629
2630         return 0;
2631 }
2632
2633 /**
2634  * base64url decode, store in qbuffer
2635  * @param h2_session: http2 session
2636  * @param h2_stream: http2 stream
2637  * @param start: start of the base64 string
2638  * @param length: length of the base64 string
2639  * @return: 0 on error, 1 otherwise. query will be stored in h2_stream->qbuffer,
2640  * buffer will be NULL is unparseble.
2641  */
2642 static int http2_buffer_uri_query(struct http2_session* h2_session,
2643         struct http2_stream* h2_stream, const uint8_t* start, size_t length)
2644 {
2645         size_t expectb64len;
2646         int b64len;
2647         if(h2_stream->http_method == HTTP_METHOD_POST)
2648                 return 1;
2649         if(length == 0)
2650                 return 1;
2651         if(h2_stream->qbuffer) {
2652                 verbose(VERB_ALGO, "http2_req_header fail, "
2653                         "qbuffer already set");
2654                 return 0;
2655         }
2656
2657         /* calculate size, might be a bit bigger than the real
2658          * decoded buffer size */
2659         expectb64len = sldns_b64_pton_calculate_size(length);
2660         log_assert(expectb64len > 0);
2661         if(expectb64len >
2662                 h2_session->c->http2_stream_max_qbuffer_size) {
2663                 h2_stream->query_too_large = 1;
2664                 return 1;
2665         }
2666
2667         lock_basic_lock(&http2_query_buffer_count_lock);
2668         if(http2_query_buffer_count + expectb64len > http2_query_buffer_max) {
2669                 lock_basic_unlock(&http2_query_buffer_count_lock);
2670                 verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2671                         "in http2-query-buffer-size");
2672                 return http2_submit_rst_stream(h2_session, h2_stream);
2673         }
2674         http2_query_buffer_count += expectb64len;
2675         lock_basic_unlock(&http2_query_buffer_count_lock);
2676         if(!(h2_stream->qbuffer = sldns_buffer_new(expectb64len))) {
2677                 lock_basic_lock(&http2_query_buffer_count_lock);
2678                 http2_query_buffer_count -= expectb64len;
2679                 lock_basic_unlock(&http2_query_buffer_count_lock);
2680                 log_err("http2_req_header fail, qbuffer "
2681                         "malloc failure");
2682                 return 0;
2683         }
2684
2685         if(sldns_b64_contains_nonurl((char const*)start, length)) {
2686                 char buf[65536+4];
2687                 verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding");
2688                 /* copy to the scratch buffer temporarily to terminate the
2689                  * string with a zero */
2690                 if(length+1 > sizeof(buf)) {
2691                         /* too long */
2692                         lock_basic_lock(&http2_query_buffer_count_lock);
2693                         http2_query_buffer_count -= expectb64len;
2694                         lock_basic_unlock(&http2_query_buffer_count_lock);
2695                         sldns_buffer_free(h2_stream->qbuffer);
2696                         h2_stream->qbuffer = NULL;
2697                         return 1;
2698                 }
2699                 memmove(buf, start, length);
2700                 buf[length] = 0;
2701                 if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current(
2702                         h2_stream->qbuffer), expectb64len)) || b64len < 0) {
2703                         lock_basic_lock(&http2_query_buffer_count_lock);
2704                         http2_query_buffer_count -= expectb64len;
2705                         lock_basic_unlock(&http2_query_buffer_count_lock);
2706                         sldns_buffer_free(h2_stream->qbuffer);
2707                         h2_stream->qbuffer = NULL;
2708                         return 1;
2709                 }
2710         } else {
2711                 if(!(b64len = sldns_b64url_pton(
2712                         (char const *)start, length,
2713                         sldns_buffer_current(h2_stream->qbuffer),
2714                         expectb64len)) || b64len < 0) {
2715                         lock_basic_lock(&http2_query_buffer_count_lock);
2716                         http2_query_buffer_count -= expectb64len;
2717                         lock_basic_unlock(&http2_query_buffer_count_lock);
2718                         sldns_buffer_free(h2_stream->qbuffer);
2719                         h2_stream->qbuffer = NULL;
2720                         /* return without error, method can be an
2721                          * unknown POST */
2722                         return 1;
2723                 }
2724         }
2725         sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len);
2726         return 1;
2727 }
2728
2729 /** nghttp2 callback. Used to parse headers from HEADER frames. */
2730 static int http2_req_header_cb(nghttp2_session* session,
2731         const nghttp2_frame* frame, const uint8_t* name, size_t namelen,
2732         const uint8_t* value, size_t valuelen, uint8_t ATTR_UNUSED(flags),
2733         void* cb_arg)
2734 {
2735         struct http2_stream* h2_stream = NULL;
2736         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2737         /* nghttp2 deals with CONTINUATION frames and provides them as part of
2738          * the HEADER */
2739         if(frame->hd.type != NGHTTP2_HEADERS ||
2740                 frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
2741                 /* only interrested in request headers */
2742                 return 0;
2743         }
2744         if(!(h2_stream = nghttp2_session_get_stream_user_data(session,
2745                 frame->hd.stream_id)))
2746                 return 0;
2747
2748         /* earlier checks already indicate we can stop handling this query */
2749         if(h2_stream->http_method == HTTP_METHOD_UNSUPPORTED ||
2750                 h2_stream->invalid_content_type ||
2751                 h2_stream->invalid_endpoint)
2752                 return 0;
2753
2754
2755         /* nghttp2 performs some sanity checks in the headers, including:
2756          * name and value are guaranteed to be null terminated
2757          * name is guaranteed to be lowercase
2758          * content-length value is guaranteed to contain digits
2759          */
2760
2761         if(!h2_stream->http_method && namelen == 7 &&
2762                 memcmp(":method", name, namelen) == 0) {
2763                 /* Case insensitive check on :method value to be on the safe
2764                  * side. I failed to find text about case sensitivity in specs.
2765                  */
2766                 if(valuelen == 3 && strcasecmp("GET", (const char*)value) == 0)
2767                         h2_stream->http_method = HTTP_METHOD_GET;
2768                 else if(valuelen == 4 &&
2769                         strcasecmp("POST", (const char*)value) == 0) {
2770                         h2_stream->http_method = HTTP_METHOD_POST;
2771                         if(h2_stream->qbuffer) {
2772                                 /* POST method uses query from DATA frames */
2773                                 lock_basic_lock(&http2_query_buffer_count_lock);
2774                                 http2_query_buffer_count -=
2775                                         sldns_buffer_capacity(h2_stream->qbuffer);
2776                                 lock_basic_unlock(&http2_query_buffer_count_lock);
2777                                 sldns_buffer_free(h2_stream->qbuffer);
2778                                 h2_stream->qbuffer = NULL;
2779                         }
2780                 } else
2781                         h2_stream->http_method = HTTP_METHOD_UNSUPPORTED;
2782                 return 0;
2783         }
2784         if(namelen == 5 && memcmp(":path", name, namelen) == 0) {
2785                 /* :path may contain DNS query, depending on method. Method might
2786                  * not be known yet here, so check after finishing receiving
2787                  * stream. */
2788 #define HTTP_QUERY_PARAM "?dns="
2789                 size_t el = strlen(h2_session->c->http_endpoint);
2790                 size_t qpl = strlen(HTTP_QUERY_PARAM);
2791
2792                 if(valuelen < el || memcmp(h2_session->c->http_endpoint,
2793                         value, el) != 0) {
2794                         h2_stream->invalid_endpoint = 1;
2795                         return 0;
2796                 }
2797                 /* larger than endpoint only allowed if it is for the query
2798                  * parameter */
2799                 if(valuelen <= el+qpl ||
2800                         memcmp(HTTP_QUERY_PARAM, value+el, qpl) != 0) {
2801                         if(valuelen != el)
2802                                 h2_stream->invalid_endpoint = 1;
2803                         return 0;
2804                 }
2805
2806                 if(!http2_buffer_uri_query(h2_session, h2_stream,
2807                         value+(el+qpl), valuelen-(el+qpl))) {
2808                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2809                 }
2810                 return 0;
2811         }
2812         /* Content type is a SHOULD (rfc7231#section-3.1.1.5) when using POST,
2813          * and not needed when using GET. Don't enfore.
2814          * If set only allow lowercase "application/dns-message".
2815          *
2816          * Clients SHOULD (rfc8484#section-4.1) set an accept header, but MUST
2817          * be able to handle "application/dns-message". Since that is the only
2818          * content-type supported we can ignore the accept header.
2819          */
2820         if((namelen == 12 && memcmp("content-type", name, namelen) == 0)) {
2821                 if(valuelen != 23 || memcmp("application/dns-message", value,
2822                         valuelen) != 0) {
2823                         h2_stream->invalid_content_type = 1;
2824                 }
2825         }
2826
2827         /* Only interested in content-lentg for POST (on not yet known) method.
2828          */
2829         if((!h2_stream->http_method ||
2830                 h2_stream->http_method == HTTP_METHOD_POST) &&
2831                 !h2_stream->content_length && namelen  == 14 &&
2832                 memcmp("content-length", name, namelen) == 0) {
2833                 if(valuelen > 5) {
2834                         h2_stream->query_too_large = 1;
2835                         return 0;
2836                 }
2837                 /* guaranteed to only contian digits and be null terminated */
2838                 h2_stream->content_length = atoi((const char*)value);
2839                 if(h2_stream->content_length >
2840                         h2_session->c->http2_stream_max_qbuffer_size) {
2841                         h2_stream->query_too_large = 1;
2842                         return 0;
2843                 }
2844         }
2845         return 0;
2846 }
2847
2848 /** nghttp2 callback. Used to get data from DATA frames, which can contain
2849  * queries in POST requests. */
2850 static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session),
2851         uint8_t ATTR_UNUSED(flags), int32_t stream_id, const uint8_t* data,
2852         size_t len, void* cb_arg)
2853 {
2854         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2855         struct http2_stream* h2_stream;
2856         size_t qlen = 0;
2857
2858         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2859                 h2_session->session, stream_id))) {
2860                 return 0;
2861         }
2862
2863         if(h2_stream->query_too_large)
2864                 return 0;
2865
2866         if(!h2_stream->qbuffer) {
2867                 if(h2_stream->content_length) {
2868                         if(h2_stream->content_length < len)
2869                                 /* getting more data in DATA frame than
2870                                  * advertised in content-length header. */
2871                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2872                         qlen = h2_stream->content_length;
2873                 } else if(len <= h2_session->c->http2_stream_max_qbuffer_size) {
2874                         /* setting this to msg-buffer-size can result in a lot
2875                          * of memory consuption. Most queries should fit in a
2876                          * single DATA frame, and most POST queries will
2877                          * containt content-length which does not impose this
2878                          * limit. */
2879                         qlen = len;
2880                 }
2881         }
2882         if(!h2_stream->qbuffer && qlen) {
2883                 lock_basic_lock(&http2_query_buffer_count_lock);
2884                 if(http2_query_buffer_count + qlen > http2_query_buffer_max) {
2885                         lock_basic_unlock(&http2_query_buffer_count_lock);
2886                         verbose(VERB_ALGO, "reset HTTP2 stream, no space left, "
2887                                 "in http2-query-buffer-size");
2888                         return http2_submit_rst_stream(h2_session, h2_stream);
2889                 }
2890                 http2_query_buffer_count += qlen;
2891                 lock_basic_unlock(&http2_query_buffer_count_lock);
2892                 if(!(h2_stream->qbuffer = sldns_buffer_new(qlen))) {
2893                         lock_basic_lock(&http2_query_buffer_count_lock);
2894                         http2_query_buffer_count -= qlen;
2895                         lock_basic_unlock(&http2_query_buffer_count_lock);
2896                 }
2897         }
2898
2899         if(!h2_stream->qbuffer ||
2900                 sldns_buffer_remaining(h2_stream->qbuffer) < len) {
2901                 verbose(VERB_ALGO, "http2 data_chunck_recv failed. Not enough "
2902                         "buffer space for POST query. Can happen on multi "
2903                         "frame requests without content-length header");
2904                 h2_stream->query_too_large = 1;
2905                 return 0;
2906         }
2907
2908         sldns_buffer_write(h2_stream->qbuffer, data, len);
2909
2910         return 0;
2911 }
2912
2913 void http2_req_stream_clear(struct http2_stream* h2_stream)
2914 {
2915         if(h2_stream->qbuffer) {
2916                 lock_basic_lock(&http2_query_buffer_count_lock);
2917                 http2_query_buffer_count -=
2918                         sldns_buffer_capacity(h2_stream->qbuffer);
2919                 lock_basic_unlock(&http2_query_buffer_count_lock);
2920                 sldns_buffer_free(h2_stream->qbuffer);
2921                 h2_stream->qbuffer = NULL;
2922         }
2923         if(h2_stream->rbuffer) {
2924                 lock_basic_lock(&http2_response_buffer_count_lock);
2925                 http2_response_buffer_count -=
2926                         sldns_buffer_capacity(h2_stream->rbuffer);
2927                 lock_basic_unlock(&http2_response_buffer_count_lock);
2928                 sldns_buffer_free(h2_stream->rbuffer);
2929                 h2_stream->rbuffer = NULL;
2930         }
2931 }
2932
2933 nghttp2_session_callbacks* http2_req_callbacks_create(void)
2934 {
2935         nghttp2_session_callbacks *callbacks;
2936         if(nghttp2_session_callbacks_new(&callbacks) == NGHTTP2_ERR_NOMEM) {
2937                 log_err("failed to initialize nghttp2 callback");
2938                 return NULL;
2939         }
2940         /* reception of header block started, used to create h2_stream */
2941         nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks,
2942                 http2_req_begin_headers_cb);
2943         /* complete frame received, used to get data from stream if frame
2944          * has end stream flag, and start processing query */
2945         nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks,
2946                 http2_req_frame_recv_cb);
2947         /* get request info from headers */
2948         nghttp2_session_callbacks_set_on_header_callback(callbacks,
2949                 http2_req_header_cb);
2950         /* get data from DATA frames, containing POST query */
2951         nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks,
2952                 http2_req_data_chunk_recv_cb);
2953
2954         /* generic HTTP2 callbacks */
2955         nghttp2_session_callbacks_set_recv_callback(callbacks, http2_recv_cb);
2956         nghttp2_session_callbacks_set_send_callback(callbacks, http2_send_cb);
2957         nghttp2_session_callbacks_set_on_stream_close_callback(callbacks,
2958                 http2_stream_close_cb);
2959
2960         return callbacks;
2961 }
2962 #endif /* HAVE_NGHTTP2 */