]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/services/listen_dnsport.c
Fix multiple vulnerabilities in unbound.
[FreeBSD/FreeBSD.git] / contrib / unbound / services / listen_dnsport.c
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #ifdef USE_TCP_FASTOPEN
47 #include <netinet/tcp.h>
48 #endif
49 #include "services/listen_dnsport.h"
50 #include "services/outside_network.h"
51 #include "util/netevent.h"
52 #include "util/log.h"
53 #include "util/config_file.h"
54 #include "util/net_help.h"
55 #include "sldns/sbuffer.h"
56 #include "services/mesh.h"
57 #include "util/fptr_wlist.h"
58 #include "util/locks.h"
59
60 #ifdef HAVE_NETDB_H
61 #include <netdb.h>
62 #endif
63 #include <fcntl.h>
64
65 #ifdef HAVE_SYS_UN_H
66 #include <sys/un.h>
67 #endif
68
69 #ifdef HAVE_SYSTEMD
70 #include <systemd/sd-daemon.h>
71 #endif
72
73 /** number of queued TCP connections for listen() */
74 #define TCP_BACKLOG 256 
75
76 /** number of simultaneous requests a client can have */
77 #define TCP_MAX_REQ_SIMULTANEOUS 32
78
79 #ifndef THREADS_DISABLED
80 /** lock on the counter of stream buffer memory */
81 static lock_basic_type stream_wait_count_lock;
82 #endif
83 /** size (in bytes) of stream wait buffers */
84 static size_t stream_wait_count = 0;
85 /** is the lock initialised for stream wait buffers */
86 static int stream_wait_lock_inited = 0;
87
88 /**
89  * Debug print of the getaddrinfo returned address.
90  * @param addr: the address returned.
91  */
92 static void
93 verbose_print_addr(struct addrinfo *addr)
94 {
95         if(verbosity >= VERB_ALGO) {
96                 char buf[100];
97                 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
98 #ifdef INET6
99                 if(addr->ai_family == AF_INET6)
100                         sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
101                                 sin6_addr;
102 #endif /* INET6 */
103                 if(inet_ntop(addr->ai_family, sinaddr, buf,
104                         (socklen_t)sizeof(buf)) == 0) {
105                         (void)strlcpy(buf, "(null)", sizeof(buf));
106                 }
107                 buf[sizeof(buf)-1] = 0;
108                 verbose(VERB_ALGO, "creating %s%s socket %s %d", 
109                         addr->ai_socktype==SOCK_DGRAM?"udp":
110                         addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
111                         addr->ai_family==AF_INET?"4":
112                         addr->ai_family==AF_INET6?"6":
113                         "_otherfam", buf, 
114                         ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
115         }
116 }
117
118 #ifdef HAVE_SYSTEMD
119 static int
120 systemd_get_activated(int family, int socktype, int listen,
121                       struct sockaddr *addr, socklen_t addrlen,
122                       const char *path)
123 {
124         int i = 0;
125         int r = 0;
126         int s = -1;
127         const char* listen_pid, *listen_fds;
128
129         /* We should use "listen" option only for stream protocols. For UDP it should be -1 */
130
131         if((r = sd_booted()) < 1) {
132                 if(r == 0)
133                         log_warn("systemd is not running");
134                 else
135                         log_err("systemd sd_booted(): %s", strerror(-r));
136                 return -1;
137         }
138
139         listen_pid = getenv("LISTEN_PID");
140         listen_fds = getenv("LISTEN_FDS");
141
142         if (!listen_pid) {
143                 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_PID");
144                 return -1;
145         }
146
147         if (!listen_fds) {
148                 log_warn("Systemd mandatory ENV variable is not defined: LISTEN_FDS");
149                 return -1;
150         }
151
152         if((r = sd_listen_fds(0)) < 1) {
153                 if(r == 0)
154                         log_warn("systemd: did not return socket, check unit configuration");
155                 else
156                         log_err("systemd sd_listen_fds(): %s", strerror(-r));
157                 return -1;
158         }
159         
160         for(i = 0; i < r; i++) {
161                 if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) {
162                         s = SD_LISTEN_FDS_START + i;
163                         break;
164                 }
165         }
166         if (s == -1) {
167                 if (addr)
168                         log_err_addr("systemd sd_listen_fds()",
169                                      "no such socket",
170                                      (struct sockaddr_storage *)addr, addrlen);
171                 else
172                         log_err("systemd sd_listen_fds(): %s", path);
173         }
174         return s;
175 }
176 #endif
177
178 int
179 create_udp_sock(int family, int socktype, struct sockaddr* addr,
180         socklen_t addrlen, int v6only, int* inuse, int* noproto,
181         int rcv, int snd, int listen, int* reuseport, int transparent,
182         int freebind, int use_systemd)
183 {
184         int s;
185 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined (SO_BINDANY)
186         int on=1;
187 #endif
188 #ifdef IPV6_MTU
189         int mtu = IPV6_MIN_MTU;
190 #endif
191 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
192         (void)rcv;
193 #endif
194 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
195         (void)snd;
196 #endif
197 #ifndef IPV6_V6ONLY
198         (void)v6only;
199 #endif
200 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
201         (void)transparent;
202 #endif
203 #if !defined(IP_FREEBIND)
204         (void)freebind;
205 #endif
206 #ifdef HAVE_SYSTEMD
207         int got_fd_from_systemd = 0;
208
209         if (!use_systemd
210             || (use_systemd
211                 && (s = systemd_get_activated(family, socktype, -1, addr,
212                                               addrlen, NULL)) == -1)) {
213 #else
214         (void)use_systemd;
215 #endif
216         if((s = socket(family, socktype, 0)) == -1) {
217                 *inuse = 0;
218 #ifndef USE_WINSOCK
219                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
220                         *noproto = 1;
221                         return -1;
222                 }
223                 log_err("can't create socket: %s", strerror(errno));
224 #else
225                 if(WSAGetLastError() == WSAEAFNOSUPPORT || 
226                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
227                         *noproto = 1;
228                         return -1;
229                 }
230                 log_err("can't create socket: %s", 
231                         wsa_strerror(WSAGetLastError()));
232 #endif
233                 *noproto = 0;
234                 return -1;
235         }
236 #ifdef HAVE_SYSTEMD
237         } else {
238                 got_fd_from_systemd = 1;
239         }
240 #endif
241         if(listen) {
242 #ifdef SO_REUSEADDR
243                 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
244                         (socklen_t)sizeof(on)) < 0) {
245 #ifndef USE_WINSOCK
246                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
247                                 strerror(errno));
248                         if(errno != ENOSYS) {
249                                 close(s);
250                                 *noproto = 0;
251                                 *inuse = 0;
252                                 return -1;
253                         }
254 #else
255                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
256                                 wsa_strerror(WSAGetLastError()));
257                         closesocket(s);
258                         *noproto = 0;
259                         *inuse = 0;
260                         return -1;
261 #endif
262                 }
263 #endif /* SO_REUSEADDR */
264 #ifdef SO_REUSEPORT
265 #  ifdef SO_REUSEPORT_LB
266                 /* on FreeBSD 12 we have SO_REUSEPORT_LB that does loadbalance
267                  * like SO_REUSEPORT on Linux.  This is what the users want
268                  * with the config option in unbound.conf; if we actually
269                  * need local address and port reuse they'll also need to
270                  * have SO_REUSEPORT set for them, assume it was _LB they want.
271                  */
272                 if (reuseport && *reuseport &&
273                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT_LB, (void*)&on,
274                         (socklen_t)sizeof(on)) < 0) {
275 #ifdef ENOPROTOOPT
276                         if(errno != ENOPROTOOPT || verbosity >= 3)
277                                 log_warn("setsockopt(.. SO_REUSEPORT_LB ..) failed: %s",
278                                         strerror(errno));
279 #endif
280                         /* this option is not essential, we can continue */
281                         *reuseport = 0;
282                 }
283 #  else /* no SO_REUSEPORT_LB */
284
285                 /* try to set SO_REUSEPORT so that incoming
286                  * queries are distributed evenly among the receiving threads.
287                  * Each thread must have its own socket bound to the same port,
288                  * with SO_REUSEPORT set on each socket.
289                  */
290                 if (reuseport && *reuseport &&
291                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
292                         (socklen_t)sizeof(on)) < 0) {
293 #ifdef ENOPROTOOPT
294                         if(errno != ENOPROTOOPT || verbosity >= 3)
295                                 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
296                                         strerror(errno));
297 #endif
298                         /* this option is not essential, we can continue */
299                         *reuseport = 0;
300                 }
301 #  endif /* SO_REUSEPORT_LB */
302 #else
303                 (void)reuseport;
304 #endif /* defined(SO_REUSEPORT) */
305 #ifdef IP_TRANSPARENT
306                 if (transparent &&
307                     setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
308                     (socklen_t)sizeof(on)) < 0) {
309                         log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
310                         strerror(errno));
311                 }
312 #elif defined(IP_BINDANY)
313                 if (transparent &&
314                     setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
315                     (family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
316                     (void*)&on, (socklen_t)sizeof(on)) < 0) {
317                         log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
318                         (family==AF_INET6?"V6":""), strerror(errno));
319                 }
320 #elif defined(SO_BINDANY)
321                 if (transparent &&
322                     setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on,
323                     (socklen_t)sizeof(on)) < 0) {
324                         log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
325                         strerror(errno));
326                 }
327 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
328         }
329 #ifdef IP_FREEBIND
330         if(freebind &&
331             setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
332             (socklen_t)sizeof(on)) < 0) {
333                 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
334                 strerror(errno));
335         }
336 #endif /* IP_FREEBIND */
337         if(rcv) {
338 #ifdef SO_RCVBUF
339                 int got;
340                 socklen_t slen = (socklen_t)sizeof(got);
341 #  ifdef SO_RCVBUFFORCE
342                 /* Linux specific: try to use root permission to override
343                  * system limits on rcvbuf. The limit is stored in 
344                  * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
345                 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 
346                         (socklen_t)sizeof(rcv)) < 0) {
347                         if(errno != EPERM) {
348 #    ifndef USE_WINSOCK
349                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
350                                         "...) failed: %s", strerror(errno));
351                                 close(s);
352 #    else
353                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
354                                         "...) failed: %s", 
355                                         wsa_strerror(WSAGetLastError()));
356                                 closesocket(s);
357 #    endif
358                                 *noproto = 0;
359                                 *inuse = 0;
360                                 return -1;
361                         }
362 #  endif /* SO_RCVBUFFORCE */
363                         if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 
364                                 (socklen_t)sizeof(rcv)) < 0) {
365 #  ifndef USE_WINSOCK
366                                 log_err("setsockopt(..., SO_RCVBUF, "
367                                         "...) failed: %s", strerror(errno));
368                                 close(s);
369 #  else
370                                 log_err("setsockopt(..., SO_RCVBUF, "
371                                         "...) failed: %s", 
372                                         wsa_strerror(WSAGetLastError()));
373                                 closesocket(s);
374 #  endif
375                                 *noproto = 0;
376                                 *inuse = 0;
377                                 return -1;
378                         }
379                         /* check if we got the right thing or if system
380                          * reduced to some system max.  Warn if so */
381                         if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 
382                                 &slen) >= 0 && got < rcv/2) {
383                                 log_warn("so-rcvbuf %u was not granted. "
384                                         "Got %u. To fix: start with "
385                                         "root permissions(linux) or sysctl "
386                                         "bigger net.core.rmem_max(linux) or "
387                                         "kern.ipc.maxsockbuf(bsd) values.",
388                                         (unsigned)rcv, (unsigned)got);
389                         }
390 #  ifdef SO_RCVBUFFORCE
391                 }
392 #  endif
393 #endif /* SO_RCVBUF */
394         }
395         /* first do RCVBUF as the receive buffer is more important */
396         if(snd) {
397 #ifdef SO_SNDBUF
398                 int got;
399                 socklen_t slen = (socklen_t)sizeof(got);
400 #  ifdef SO_SNDBUFFORCE
401                 /* Linux specific: try to use root permission to override
402                  * system limits on sndbuf. The limit is stored in 
403                  * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
404                 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 
405                         (socklen_t)sizeof(snd)) < 0) {
406                         if(errno != EPERM) {
407 #    ifndef USE_WINSOCK
408                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
409                                         "...) failed: %s", strerror(errno));
410                                 close(s);
411 #    else
412                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
413                                         "...) failed: %s", 
414                                         wsa_strerror(WSAGetLastError()));
415                                 closesocket(s);
416 #    endif
417                                 *noproto = 0;
418                                 *inuse = 0;
419                                 return -1;
420                         }
421 #  endif /* SO_SNDBUFFORCE */
422                         if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 
423                                 (socklen_t)sizeof(snd)) < 0) {
424 #  ifndef USE_WINSOCK
425                                 log_err("setsockopt(..., SO_SNDBUF, "
426                                         "...) failed: %s", strerror(errno));
427                                 close(s);
428 #  else
429                                 log_err("setsockopt(..., SO_SNDBUF, "
430                                         "...) failed: %s", 
431                                         wsa_strerror(WSAGetLastError()));
432                                 closesocket(s);
433 #  endif
434                                 *noproto = 0;
435                                 *inuse = 0;
436                                 return -1;
437                         }
438                         /* check if we got the right thing or if system
439                          * reduced to some system max.  Warn if so */
440                         if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 
441                                 &slen) >= 0 && got < snd/2) {
442                                 log_warn("so-sndbuf %u was not granted. "
443                                         "Got %u. To fix: start with "
444                                         "root permissions(linux) or sysctl "
445                                         "bigger net.core.wmem_max(linux) or "
446                                         "kern.ipc.maxsockbuf(bsd) values.",
447                                         (unsigned)snd, (unsigned)got);
448                         }
449 #  ifdef SO_SNDBUFFORCE
450                 }
451 #  endif
452 #endif /* SO_SNDBUF */
453         }
454         if(family == AF_INET6) {
455 # if defined(IPV6_V6ONLY)
456                 if(v6only) {
457                         int val=(v6only==2)?0:1;
458                         if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
459                                 (void*)&val, (socklen_t)sizeof(val)) < 0) {
460 #ifndef USE_WINSOCK
461                                 log_err("setsockopt(..., IPV6_V6ONLY"
462                                         ", ...) failed: %s", strerror(errno));
463                                 close(s);
464 #else
465                                 log_err("setsockopt(..., IPV6_V6ONLY"
466                                         ", ...) failed: %s", 
467                                         wsa_strerror(WSAGetLastError()));
468                                 closesocket(s);
469 #endif
470                                 *noproto = 0;
471                                 *inuse = 0;
472                                 return -1;
473                         }
474                 }
475 # endif
476 # if defined(IPV6_USE_MIN_MTU)
477                 /*
478                  * There is no fragmentation of IPv6 datagrams
479                  * during forwarding in the network. Therefore
480                  * we do not send UDP datagrams larger than
481                  * the minimum IPv6 MTU of 1280 octets. The
482                  * EDNS0 message length can be larger if the
483                  * network stack supports IPV6_USE_MIN_MTU.
484                  */
485                 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
486                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
487 #  ifndef USE_WINSOCK
488                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
489                                 "...) failed: %s", strerror(errno));
490                         close(s);
491 #  else
492                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
493                                 "...) failed: %s", 
494                                 wsa_strerror(WSAGetLastError()));
495                         closesocket(s);
496 #  endif
497                         *noproto = 0;
498                         *inuse = 0;
499                         return -1;
500                 }
501 # elif defined(IPV6_MTU)
502                 /*
503                  * On Linux, to send no larger than 1280, the PMTUD is
504                  * disabled by default for datagrams anyway, so we set
505                  * the MTU to use.
506                  */
507                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
508                         (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
509 #  ifndef USE_WINSOCK
510                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
511                                 strerror(errno));
512                         close(s);
513 #  else
514                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
515                                 wsa_strerror(WSAGetLastError()));
516                         closesocket(s);
517 #  endif
518                         *noproto = 0;
519                         *inuse = 0;
520                         return -1;
521                 }
522 # endif /* IPv6 MTU */
523         } else if(family == AF_INET) {
524 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
525 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
526  * PMTU information is not accepted, but fragmentation is allowed
527  * if and only if the packet size exceeds the outgoing interface MTU
528  * (and also uses the interface mtu to determine the size of the packets).
529  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
530  * FreeBSD already has same semantics without setting the option. */
531                 int omit_set = 0;
532                 int action;
533 #   if defined(IP_PMTUDISC_OMIT)
534                 action = IP_PMTUDISC_OMIT;
535                 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 
536                         &action, (socklen_t)sizeof(action)) < 0) {
537
538                         if (errno != EINVAL) {
539                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
540                                         strerror(errno));
541
542 #    ifndef USE_WINSOCK
543                                 close(s);
544 #    else
545                                 closesocket(s);
546 #    endif
547                                 *noproto = 0;
548                                 *inuse = 0;
549                                 return -1;
550                         }
551                 }
552                 else
553                 {
554                     omit_set = 1;
555                 }
556 #   endif
557                 if (omit_set == 0) {
558                         action = IP_PMTUDISC_DONT;
559                         if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
560                                 &action, (socklen_t)sizeof(action)) < 0) {
561                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
562                                         strerror(errno));
563 #    ifndef USE_WINSOCK
564                                 close(s);
565 #    else
566                                 closesocket(s);
567 #    endif
568                                 *noproto = 0;
569                                 *inuse = 0;
570                                 return -1;
571                         }
572                 }
573 #  elif defined(IP_DONTFRAG)
574                 int off = 0;
575                 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 
576                         &off, (socklen_t)sizeof(off)) < 0) {
577                         log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
578                                 strerror(errno));
579 #    ifndef USE_WINSOCK
580                         close(s);
581 #    else
582                         closesocket(s);
583 #    endif
584                         *noproto = 0;
585                         *inuse = 0;
586                         return -1;
587                 }
588 #  endif /* IPv4 MTU */
589         }
590         if(
591 #ifdef HAVE_SYSTEMD
592                 !got_fd_from_systemd &&
593 #endif
594                 bind(s, (struct sockaddr*)addr, addrlen) != 0) {
595                 *noproto = 0;
596                 *inuse = 0;
597 #ifndef USE_WINSOCK
598 #ifdef EADDRINUSE
599                 *inuse = (errno == EADDRINUSE);
600                 /* detect freebsd jail with no ipv6 permission */
601                 if(family==AF_INET6 && errno==EINVAL)
602                         *noproto = 1;
603                 else if(errno != EADDRINUSE &&
604                         !(errno == EACCES && verbosity < 4 && !listen)
605 #ifdef EADDRNOTAVAIL
606                         && !(errno == EADDRNOTAVAIL && verbosity < 4 && !listen)
607 #endif
608                         ) {
609                         log_err_addr("can't bind socket", strerror(errno),
610                                 (struct sockaddr_storage*)addr, addrlen);
611                 }
612 #endif /* EADDRINUSE */
613                 close(s);
614 #else /* USE_WINSOCK */
615                 if(WSAGetLastError() != WSAEADDRINUSE &&
616                         WSAGetLastError() != WSAEADDRNOTAVAIL &&
617                         !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) {
618                         log_err_addr("can't bind socket", 
619                                 wsa_strerror(WSAGetLastError()),
620                                 (struct sockaddr_storage*)addr, addrlen);
621                 }
622                 closesocket(s);
623 #endif /* USE_WINSOCK */
624                 return -1;
625         }
626         if(!fd_set_nonblock(s)) {
627                 *noproto = 0;
628                 *inuse = 0;
629 #ifndef USE_WINSOCK
630                 close(s);
631 #else
632                 closesocket(s);
633 #endif
634                 return -1;
635         }
636         return s;
637 }
638
639 int
640 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
641         int* reuseport, int transparent, int mss, int freebind, int use_systemd)
642 {
643         int s;
644 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT) || defined(IP_BINDANY) || defined(IP_FREEBIND) || defined(SO_BINDANY)
645         int on = 1;
646 #endif
647 #ifdef HAVE_SYSTEMD
648         int got_fd_from_systemd = 0;
649 #endif
650 #ifdef USE_TCP_FASTOPEN
651         int qlen;
652 #endif
653 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY) && !defined(SO_BINDANY)
654         (void)transparent;
655 #endif
656 #if !defined(IP_FREEBIND)
657         (void)freebind;
658 #endif
659         verbose_print_addr(addr);
660         *noproto = 0;
661 #ifdef HAVE_SYSTEMD
662         if (!use_systemd ||
663             (use_systemd
664              && (s = systemd_get_activated(addr->ai_family, addr->ai_socktype, 1,
665                                            addr->ai_addr, addr->ai_addrlen,
666                                            NULL)) == -1)) {
667 #else
668         (void)use_systemd;
669 #endif
670         if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
671 #ifndef USE_WINSOCK
672                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
673                         *noproto = 1;
674                         return -1;
675                 }
676                 log_err("can't create socket: %s", strerror(errno));
677 #else
678                 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
679                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
680                         *noproto = 1;
681                         return -1;
682                 }
683                 log_err("can't create socket: %s", 
684                         wsa_strerror(WSAGetLastError()));
685 #endif
686                 return -1;
687         }
688         if (mss > 0) {
689 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
690                 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
691                         (socklen_t)sizeof(mss)) < 0) {
692                         #ifndef USE_WINSOCK
693                         log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
694                                 strerror(errno));
695                         #else
696                         log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
697                                 wsa_strerror(WSAGetLastError()));
698                         #endif
699                 } else {
700                         verbose(VERB_ALGO,
701                                 " tcp socket mss set to %d", mss);
702                 }
703 #else
704                 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
705 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
706         }
707 #ifdef HAVE_SYSTEMD
708         } else {
709                 got_fd_from_systemd = 1;
710     }
711 #endif
712 #ifdef SO_REUSEADDR
713         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
714                 (socklen_t)sizeof(on)) < 0) {
715 #ifndef USE_WINSOCK
716                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
717                         strerror(errno));
718                 close(s);
719 #else
720                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
721                         wsa_strerror(WSAGetLastError()));
722                 closesocket(s);
723 #endif
724                 return -1;
725         }
726 #endif /* SO_REUSEADDR */
727 #ifdef IP_FREEBIND
728         if (freebind && setsockopt(s, IPPROTO_IP, IP_FREEBIND, (void*)&on,
729             (socklen_t)sizeof(on)) < 0) {
730                 log_warn("setsockopt(.. IP_FREEBIND ..) failed: %s",
731                 strerror(errno));
732         }
733 #endif /* IP_FREEBIND */
734 #ifdef SO_REUSEPORT
735         /* try to set SO_REUSEPORT so that incoming
736          * connections are distributed evenly among the receiving threads.
737          * Each thread must have its own socket bound to the same port,
738          * with SO_REUSEPORT set on each socket.
739          */
740         if (reuseport && *reuseport &&
741                 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
742                 (socklen_t)sizeof(on)) < 0) {
743 #ifdef ENOPROTOOPT
744                 if(errno != ENOPROTOOPT || verbosity >= 3)
745                         log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
746                                 strerror(errno));
747 #endif
748                 /* this option is not essential, we can continue */
749                 *reuseport = 0;
750         }
751 #else
752         (void)reuseport;
753 #endif /* defined(SO_REUSEPORT) */
754 #if defined(IPV6_V6ONLY)
755         if(addr->ai_family == AF_INET6 && v6only) {
756                 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
757                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
758 #ifndef USE_WINSOCK
759                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
760                                 strerror(errno));
761                         close(s);
762 #else
763                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
764                                 wsa_strerror(WSAGetLastError()));
765                         closesocket(s);
766 #endif
767                         return -1;
768                 }
769         }
770 #else
771         (void)v6only;
772 #endif /* IPV6_V6ONLY */
773 #ifdef IP_TRANSPARENT
774         if (transparent &&
775             setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
776             (socklen_t)sizeof(on)) < 0) {
777                 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
778                         strerror(errno));
779         }
780 #elif defined(IP_BINDANY)
781         if (transparent &&
782             setsockopt(s, (addr->ai_family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
783             (addr->ai_family == AF_INET6? IPV6_BINDANY:IP_BINDANY),
784             (void*)&on, (socklen_t)sizeof(on)) < 0) {
785                 log_warn("setsockopt(.. IP%s_BINDANY ..) failed: %s",
786                 (addr->ai_family==AF_INET6?"V6":""), strerror(errno));
787         }
788 #elif defined(SO_BINDANY)
789         if (transparent &&
790             setsockopt(s, SOL_SOCKET, SO_BINDANY, (void*)&on, (socklen_t)
791             sizeof(on)) < 0) {
792                 log_warn("setsockopt(.. SO_BINDANY ..) failed: %s",
793                 strerror(errno));
794         }
795 #endif /* IP_TRANSPARENT || IP_BINDANY || SO_BINDANY */
796         if(
797 #ifdef HAVE_SYSTEMD
798                 !got_fd_from_systemd &&
799 #endif
800         bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
801 #ifndef USE_WINSOCK
802                 /* detect freebsd jail with no ipv6 permission */
803                 if(addr->ai_family==AF_INET6 && errno==EINVAL)
804                         *noproto = 1;
805                 else {
806                         log_err_addr("can't bind socket", strerror(errno),
807                                 (struct sockaddr_storage*)addr->ai_addr,
808                                 addr->ai_addrlen);
809                 }
810                 close(s);
811 #else
812                 log_err_addr("can't bind socket", 
813                         wsa_strerror(WSAGetLastError()),
814                         (struct sockaddr_storage*)addr->ai_addr,
815                         addr->ai_addrlen);
816                 closesocket(s);
817 #endif
818                 return -1;
819         }
820         if(!fd_set_nonblock(s)) {
821 #ifndef USE_WINSOCK
822                 close(s);
823 #else
824                 closesocket(s);
825 #endif
826                 return -1;
827         }
828         if(listen(s, TCP_BACKLOG) == -1) {
829 #ifndef USE_WINSOCK
830                 log_err("can't listen: %s", strerror(errno));
831                 close(s);
832 #else
833                 log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
834                 closesocket(s);
835 #endif
836                 return -1;
837         }
838 #ifdef USE_TCP_FASTOPEN
839         /* qlen specifies how many outstanding TFO requests to allow. Limit is a defense
840            against IP spoofing attacks as suggested in RFC7413 */
841 #ifdef __APPLE__
842         /* OS X implementation only supports qlen of 1 via this call. Actual
843            value is configured by the net.inet.tcp.fastopen_backlog kernel parm. */
844         qlen = 1;
845 #else
846         /* 5 is recommended on linux */
847         qlen = 5;
848 #endif
849         if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, 
850                   sizeof(qlen))) == -1 ) {
851 #ifdef ENOPROTOOPT
852                 /* squelch ENOPROTOOPT: freebsd server mode with kernel support
853                    disabled, except when verbosity enabled for debugging */
854                 if(errno != ENOPROTOOPT || verbosity >= 3) {
855 #endif
856                   if(errno == EPERM) {
857                         log_warn("Setting TCP Fast Open as server failed: %s ; this could likely be because sysctl net.inet.tcp.fastopen.enabled, net.inet.tcp.fastopen.server_enable, or net.ipv4.tcp_fastopen is disabled", strerror(errno));
858                   } else {
859                         log_err("Setting TCP Fast Open as server failed: %s", strerror(errno));
860                   }
861 #ifdef ENOPROTOOPT
862                 }
863 #endif
864         }
865 #endif
866         return s;
867 }
868
869 int
870 create_local_accept_sock(const char *path, int* noproto, int use_systemd)
871 {
872 #ifdef HAVE_SYSTEMD
873         int ret;
874
875         if (use_systemd && (ret = systemd_get_activated(AF_LOCAL, SOCK_STREAM, 1, NULL, 0, path)) != -1)
876                 return ret;
877         else {
878 #endif
879 #ifdef HAVE_SYS_UN_H
880         int s;
881         struct sockaddr_un usock;
882 #ifndef HAVE_SYSTEMD
883         (void)use_systemd;
884 #endif
885
886         verbose(VERB_ALGO, "creating unix socket %s", path);
887 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
888         /* this member exists on BSDs, not Linux */
889         usock.sun_len = (unsigned)sizeof(usock);
890 #endif
891         usock.sun_family = AF_LOCAL;
892         /* length is 92-108, 104 on FreeBSD */
893         (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
894
895         if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
896                 log_err("Cannot create local socket %s (%s)",
897                         path, strerror(errno));
898                 return -1;
899         }
900
901         if (unlink(path) && errno != ENOENT) {
902                 /* The socket already exists and cannot be removed */
903                 log_err("Cannot remove old local socket %s (%s)",
904                         path, strerror(errno));
905                 goto err;
906         }
907
908         if (bind(s, (struct sockaddr *)&usock,
909                 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
910                 log_err("Cannot bind local socket %s (%s)",
911                         path, strerror(errno));
912                 goto err;
913         }
914
915         if (!fd_set_nonblock(s)) {
916                 log_err("Cannot set non-blocking mode");
917                 goto err;
918         }
919
920         if (listen(s, TCP_BACKLOG) == -1) {
921                 log_err("can't listen: %s", strerror(errno));
922                 goto err;
923         }
924
925         (void)noproto; /*unused*/
926         return s;
927
928 err:
929 #ifndef USE_WINSOCK
930         close(s);
931 #else
932         closesocket(s);
933 #endif
934         return -1;
935
936 #ifdef HAVE_SYSTEMD
937         }
938 #endif
939 #else
940         (void)use_systemd;
941         (void)path;
942         log_err("Local sockets are not supported");
943         *noproto = 1;
944         return -1;
945 #endif
946 }
947
948
949 /**
950  * Create socket from getaddrinfo results
951  */
952 static int
953 make_sock(int stype, const char* ifname, const char* port, 
954         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
955         int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd)
956 {
957         struct addrinfo *res = NULL;
958         int r, s, inuse, noproto;
959         hints->ai_socktype = stype;
960         *noip6 = 0;
961         if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
962 #ifdef USE_WINSOCK
963                 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
964                         *noip6 = 1; /* 'Host not found' for IP6 on winXP */
965                         return -1;
966                 }
967 #endif
968                 log_err("node %s:%s getaddrinfo: %s %s", 
969                         ifname?ifname:"default", port, gai_strerror(r),
970 #ifdef EAI_SYSTEM
971                         r==EAI_SYSTEM?(char*)strerror(errno):""
972 #else
973                         ""
974 #endif
975                 );
976                 return -1;
977         }
978         if(stype == SOCK_DGRAM) {
979                 verbose_print_addr(res);
980                 s = create_udp_sock(res->ai_family, res->ai_socktype,
981                         (struct sockaddr*)res->ai_addr, res->ai_addrlen,
982                         v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
983                         reuseport, transparent, freebind, use_systemd);
984                 if(s == -1 && inuse) {
985                         log_err("bind: address already in use");
986                 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
987                         *noip6 = 1;
988                 }
989         } else  {
990                 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
991                         transparent, tcp_mss, freebind, use_systemd);
992                 if(s == -1 && noproto && hints->ai_family == AF_INET6){
993                         *noip6 = 1;
994                 }
995         }
996         freeaddrinfo(res);
997         return s;
998 }
999
1000 /** make socket and first see if ifname contains port override info */
1001 static int
1002 make_sock_port(int stype, const char* ifname, const char* port, 
1003         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
1004         int* reuseport, int transparent, int tcp_mss, int freebind, int use_systemd)
1005 {
1006         char* s = strchr(ifname, '@');
1007         if(s) {
1008                 /* override port with ifspec@port */
1009                 char p[16];
1010                 char newif[128];
1011                 if((size_t)(s-ifname) >= sizeof(newif)) {
1012                         log_err("ifname too long: %s", ifname);
1013                         *noip6 = 0;
1014                         return -1;
1015                 }
1016                 if(strlen(s+1) >= sizeof(p)) {
1017                         log_err("portnumber too long: %s", ifname);
1018                         *noip6 = 0;
1019                         return -1;
1020                 }
1021                 (void)strlcpy(newif, ifname, sizeof(newif));
1022                 newif[s-ifname] = 0;
1023                 (void)strlcpy(p, s+1, sizeof(p));
1024                 p[strlen(s+1)]=0;
1025                 return make_sock(stype, newif, p, hints, v6only, noip6,
1026                         rcv, snd, reuseport, transparent, tcp_mss, freebind, use_systemd);
1027         }
1028         return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
1029                 reuseport, transparent, tcp_mss, freebind, use_systemd);
1030 }
1031
1032 /**
1033  * Add port to open ports list.
1034  * @param list: list head. changed.
1035  * @param s: fd.
1036  * @param ftype: if fd is UDP.
1037  * @return false on failure. list in unchanged then.
1038  */
1039 static int
1040 port_insert(struct listen_port** list, int s, enum listen_type ftype)
1041 {
1042         struct listen_port* item = (struct listen_port*)malloc(
1043                 sizeof(struct listen_port));
1044         if(!item)
1045                 return 0;
1046         item->next = *list;
1047         item->fd = s;
1048         item->ftype = ftype;
1049         *list = item;
1050         return 1;
1051 }
1052
1053 /** set fd to receive source address packet info */
1054 static int
1055 set_recvpktinfo(int s, int family) 
1056 {
1057 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
1058         int on = 1;
1059 #else
1060         (void)s;
1061 #endif
1062         if(family == AF_INET6) {
1063 #           ifdef IPV6_RECVPKTINFO
1064                 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1065                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1066                         log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
1067                                 strerror(errno));
1068                         return 0;
1069                 }
1070 #           elif defined(IPV6_PKTINFO)
1071                 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
1072                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1073                         log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
1074                                 strerror(errno));
1075                         return 0;
1076                 }
1077 #           else
1078                 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
1079                         "disable interface-automatic or do-ip6 in config");
1080                 return 0;
1081 #           endif /* defined IPV6_RECVPKTINFO */
1082
1083         } else if(family == AF_INET) {
1084 #           ifdef IP_PKTINFO
1085                 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
1086                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1087                         log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
1088                                 strerror(errno));
1089                         return 0;
1090                 }
1091 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
1092                 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
1093                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
1094                         log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
1095                                 strerror(errno));
1096                         return 0;
1097                 }
1098 #           else
1099                 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
1100                         "interface-automatic or do-ip4 in config");
1101                 return 0;
1102 #           endif /* IP_PKTINFO */
1103
1104         }
1105         return 1;
1106 }
1107
1108 /** see if interface is ssl, its port number == the ssl port number */
1109 static int
1110 if_is_ssl(const char* ifname, const char* port, int ssl_port,
1111         struct config_strlist* tls_additional_port)
1112 {
1113         struct config_strlist* s;
1114         char* p = strchr(ifname, '@');
1115         if(!p && atoi(port) == ssl_port)
1116                 return 1;
1117         if(p && atoi(p+1) == ssl_port)
1118                 return 1;
1119         for(s = tls_additional_port; s; s = s->next) {
1120                 if(p && atoi(p+1) == atoi(s->str))
1121                         return 1;
1122                 if(!p && atoi(port) == atoi(s->str))
1123                         return 1;
1124         }
1125         return 0;
1126 }
1127
1128 /**
1129  * Helper for ports_open. Creates one interface (or NULL for default).
1130  * @param ifname: The interface ip address.
1131  * @param do_auto: use automatic interface detection.
1132  *      If enabled, then ifname must be the wildcard name.
1133  * @param do_udp: if udp should be used.
1134  * @param do_tcp: if udp should be used.
1135  * @param hints: for getaddrinfo. family and flags have to be set by caller.
1136  * @param port: Port number to use (as string).
1137  * @param list: list of open ports, appended to, changed to point to list head.
1138  * @param rcv: receive buffer size for UDP
1139  * @param snd: send buffer size for UDP
1140  * @param ssl_port: ssl service port number
1141  * @param tls_additional_port: list of additional ssl service port numbers.
1142  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
1143  *      set to false on exit if reuseport failed due to no kernel support.
1144  * @param transparent: set IP_TRANSPARENT socket option.
1145  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
1146  * @param freebind: set IP_FREEBIND socket option.
1147  * @param use_systemd: if true, fetch sockets from systemd.
1148  * @param dnscrypt_port: dnscrypt service port number
1149  * @return: returns false on error.
1150  */
1151 static int
1152 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 
1153         struct addrinfo *hints, const char* port, struct listen_port** list,
1154         size_t rcv, size_t snd, int ssl_port,
1155         struct config_strlist* tls_additional_port, int* reuseport,
1156         int transparent, int tcp_mss, int freebind, int use_systemd,
1157         int dnscrypt_port)
1158 {
1159         int s, noip6=0;
1160 #ifdef USE_DNSCRYPT
1161         int is_dnscrypt = ((strchr(ifname, '@') && 
1162                         atoi(strchr(ifname, '@')+1) == dnscrypt_port) ||
1163                         (!strchr(ifname, '@') && atoi(port) == dnscrypt_port));
1164 #else
1165         int is_dnscrypt = 0;
1166         (void)dnscrypt_port;
1167 #endif
1168
1169         if(!do_udp && !do_tcp)
1170                 return 0;
1171         if(do_auto) {
1172                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
1173                         &noip6, rcv, snd, reuseport, transparent,
1174                         tcp_mss, freebind, use_systemd)) == -1) {
1175                         if(noip6) {
1176                                 log_warn("IPv6 protocol not available");
1177                                 return 1;
1178                         }
1179                         return 0;
1180                 }
1181                 /* getting source addr packet info is highly non-portable */
1182                 if(!set_recvpktinfo(s, hints->ai_family)) {
1183 #ifndef USE_WINSOCK
1184                         close(s);
1185 #else
1186                         closesocket(s);
1187 #endif
1188                         return 0;
1189                 }
1190                 if(!port_insert(list, s,
1191                    is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) {
1192 #ifndef USE_WINSOCK
1193                         close(s);
1194 #else
1195                         closesocket(s);
1196 #endif
1197                         return 0;
1198                 }
1199         } else if(do_udp) {
1200                 /* regular udp socket */
1201                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
1202                         &noip6, rcv, snd, reuseport, transparent,
1203                         tcp_mss, freebind, use_systemd)) == -1) {
1204                         if(noip6) {
1205                                 log_warn("IPv6 protocol not available");
1206                                 return 1;
1207                         }
1208                         return 0;
1209                 }
1210                 if(!port_insert(list, s,
1211                    is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) {
1212 #ifndef USE_WINSOCK
1213                         close(s);
1214 #else
1215                         closesocket(s);
1216 #endif
1217                         return 0;
1218                 }
1219         }
1220         if(do_tcp) {
1221                 int is_ssl = if_is_ssl(ifname, port, ssl_port,
1222                         tls_additional_port);
1223                 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 
1224                         &noip6, 0, 0, reuseport, transparent, tcp_mss,
1225                         freebind, use_systemd)) == -1) {
1226                         if(noip6) {
1227                                 /*log_warn("IPv6 protocol not available");*/
1228                                 return 1;
1229                         }
1230                         return 0;
1231                 }
1232                 if(is_ssl)
1233                         verbose(VERB_ALGO, "setup TCP for SSL service");
1234                 if(!port_insert(list, s, is_ssl?listen_type_ssl:
1235                         (is_dnscrypt?listen_type_tcp_dnscrypt:listen_type_tcp))) {
1236 #ifndef USE_WINSOCK
1237                         close(s);
1238 #else
1239                         closesocket(s);
1240 #endif
1241                         return 0;
1242                 }
1243         }
1244         return 1;
1245 }
1246
1247 /** 
1248  * Add items to commpoint list in front.
1249  * @param c: commpoint to add.
1250  * @param front: listen struct.
1251  * @return: false on failure.
1252  */
1253 static int
1254 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
1255 {
1256         struct listen_list* item = (struct listen_list*)malloc(
1257                 sizeof(struct listen_list));
1258         if(!item)
1259                 return 0;
1260         item->com = c;
1261         item->next = front->cps;
1262         front->cps = item;
1263         return 1;
1264 }
1265
1266 struct listen_dnsport* 
1267 listen_create(struct comm_base* base, struct listen_port* ports,
1268         size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
1269         struct tcl_list* tcp_conn_limit, void* sslctx,
1270         struct dt_env* dtenv, comm_point_callback_type* cb, void *cb_arg)
1271 {
1272         struct listen_dnsport* front = (struct listen_dnsport*)
1273                 malloc(sizeof(struct listen_dnsport));
1274         if(!front)
1275                 return NULL;
1276         front->cps = NULL;
1277         front->udp_buff = sldns_buffer_new(bufsize);
1278 #ifdef USE_DNSCRYPT
1279         front->dnscrypt_udp_buff = NULL;
1280 #endif
1281         if(!front->udp_buff) {
1282                 free(front);
1283                 return NULL;
1284         }
1285         if(!stream_wait_lock_inited) {
1286                 lock_basic_init(&stream_wait_count_lock);
1287                 stream_wait_lock_inited = 1;
1288         }
1289
1290         /* create comm points as needed */
1291         while(ports) {
1292                 struct comm_point* cp = NULL;
1293                 if(ports->ftype == listen_type_udp ||
1294                    ports->ftype == listen_type_udp_dnscrypt)
1295                         cp = comm_point_create_udp(base, ports->fd, 
1296                                 front->udp_buff, cb, cb_arg);
1297                 else if(ports->ftype == listen_type_tcp ||
1298                                 ports->ftype == listen_type_tcp_dnscrypt)
1299                         cp = comm_point_create_tcp(base, ports->fd, 
1300                                 tcp_accept_count, tcp_idle_timeout,
1301                                 tcp_conn_limit, bufsize, front->udp_buff,
1302                                 cb, cb_arg);
1303                 else if(ports->ftype == listen_type_ssl) {
1304                         cp = comm_point_create_tcp(base, ports->fd, 
1305                                 tcp_accept_count, tcp_idle_timeout,
1306                                 tcp_conn_limit, bufsize, front->udp_buff,
1307                                 cb, cb_arg);
1308                         cp->ssl = sslctx;
1309                 } else if(ports->ftype == listen_type_udpancil ||
1310                                   ports->ftype == listen_type_udpancil_dnscrypt)
1311                         cp = comm_point_create_udp_ancil(base, ports->fd, 
1312                                 front->udp_buff, cb, cb_arg);
1313                 if(!cp) {
1314                         log_err("can't create commpoint");      
1315                         listen_delete(front);
1316                         return NULL;
1317                 }
1318                 cp->dtenv = dtenv;
1319                 cp->do_not_close = 1;
1320 #ifdef USE_DNSCRYPT
1321                 if (ports->ftype == listen_type_udp_dnscrypt ||
1322                         ports->ftype == listen_type_tcp_dnscrypt ||
1323                         ports->ftype == listen_type_udpancil_dnscrypt) {
1324                         cp->dnscrypt = 1;
1325                         cp->dnscrypt_buffer = sldns_buffer_new(bufsize);
1326                         if(!cp->dnscrypt_buffer) {
1327                                 log_err("can't alloc dnscrypt_buffer");
1328                                 comm_point_delete(cp);
1329                                 listen_delete(front);
1330                                 return NULL;
1331                         }
1332                         front->dnscrypt_udp_buff = cp->dnscrypt_buffer;
1333                 }
1334 #endif
1335                 if(!listen_cp_insert(cp, front)) {
1336                         log_err("malloc failed");
1337                         comm_point_delete(cp);
1338                         listen_delete(front);
1339                         return NULL;
1340                 }
1341                 ports = ports->next;
1342         }
1343         if(!front->cps) {
1344                 log_err("Could not open sockets to accept queries.");
1345                 listen_delete(front);
1346                 return NULL;
1347         }
1348
1349         return front;
1350 }
1351
1352 void
1353 listen_list_delete(struct listen_list* list)
1354 {
1355         struct listen_list *p = list, *pn;
1356         while(p) {
1357                 pn = p->next;
1358                 comm_point_delete(p->com);
1359                 free(p);
1360                 p = pn;
1361         }
1362 }
1363
1364 void 
1365 listen_delete(struct listen_dnsport* front)
1366 {
1367         if(!front) 
1368                 return;
1369         listen_list_delete(front->cps);
1370 #ifdef USE_DNSCRYPT
1371         if(front->dnscrypt_udp_buff &&
1372                 front->udp_buff != front->dnscrypt_udp_buff) {
1373                 sldns_buffer_free(front->dnscrypt_udp_buff);
1374         }
1375 #endif
1376         sldns_buffer_free(front->udp_buff);
1377         free(front);
1378         if(stream_wait_lock_inited) {
1379                 stream_wait_lock_inited = 0;
1380                 lock_basic_destroy(&stream_wait_count_lock);
1381         }
1382 }
1383
1384 struct listen_port* 
1385 listening_ports_open(struct config_file* cfg, int* reuseport)
1386 {
1387         struct listen_port* list = NULL;
1388         struct addrinfo hints;
1389         int i, do_ip4, do_ip6;
1390         int do_tcp, do_auto;
1391         char portbuf[32];
1392         snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1393         do_ip4 = cfg->do_ip4;
1394         do_ip6 = cfg->do_ip6;
1395         do_tcp = cfg->do_tcp;
1396         do_auto = cfg->if_automatic && cfg->do_udp;
1397         if(cfg->incoming_num_tcp == 0)
1398                 do_tcp = 0;
1399
1400         /* getaddrinfo */
1401         memset(&hints, 0, sizeof(hints));
1402         hints.ai_flags = AI_PASSIVE;
1403         /* no name lookups on our listening ports */
1404         if(cfg->num_ifs > 0)
1405                 hints.ai_flags |= AI_NUMERICHOST;
1406         hints.ai_family = AF_UNSPEC;
1407 #ifndef INET6
1408         do_ip6 = 0;
1409 #endif
1410         if(!do_ip4 && !do_ip6) {
1411                 return NULL;
1412         }
1413         /* create ip4 and ip6 ports so that return addresses are nice. */
1414         if(do_auto || cfg->num_ifs == 0) {
1415                 if(do_ip6) {
1416                         hints.ai_family = AF_INET6;
1417                         if(!ports_create_if(do_auto?"::0":"::1", 
1418                                 do_auto, cfg->do_udp, do_tcp, 
1419                                 &hints, portbuf, &list,
1420                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1421                                 cfg->ssl_port, cfg->tls_additional_port,
1422                                 reuseport, cfg->ip_transparent,
1423                                 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1424                                 cfg->dnscrypt_port)) {
1425                                 listening_ports_free(list);
1426                                 return NULL;
1427                         }
1428                 }
1429                 if(do_ip4) {
1430                         hints.ai_family = AF_INET;
1431                         if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 
1432                                 do_auto, cfg->do_udp, do_tcp, 
1433                                 &hints, portbuf, &list,
1434                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1435                                 cfg->ssl_port, cfg->tls_additional_port,
1436                                 reuseport, cfg->ip_transparent,
1437                                 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1438                                 cfg->dnscrypt_port)) {
1439                                 listening_ports_free(list);
1440                                 return NULL;
1441                         }
1442                 }
1443         } else for(i = 0; i<cfg->num_ifs; i++) {
1444                 if(str_is_ip6(cfg->ifs[i])) {
1445                         if(!do_ip6)
1446                                 continue;
1447                         hints.ai_family = AF_INET6;
1448                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1449                                 do_tcp, &hints, portbuf, &list, 
1450                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1451                                 cfg->ssl_port, cfg->tls_additional_port,
1452                                 reuseport, cfg->ip_transparent,
1453                                 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1454                                 cfg->dnscrypt_port)) {
1455                                 listening_ports_free(list);
1456                                 return NULL;
1457                         }
1458                 } else {
1459                         if(!do_ip4)
1460                                 continue;
1461                         hints.ai_family = AF_INET;
1462                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1463                                 do_tcp, &hints, portbuf, &list, 
1464                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1465                                 cfg->ssl_port, cfg->tls_additional_port,
1466                                 reuseport, cfg->ip_transparent,
1467                                 cfg->tcp_mss, cfg->ip_freebind, cfg->use_systemd,
1468                                 cfg->dnscrypt_port)) {
1469                                 listening_ports_free(list);
1470                                 return NULL;
1471                         }
1472                 }
1473         }
1474         return list;
1475 }
1476
1477 void listening_ports_free(struct listen_port* list)
1478 {
1479         struct listen_port* nx;
1480         while(list) {
1481                 nx = list->next;
1482                 if(list->fd != -1) {
1483 #ifndef USE_WINSOCK
1484                         close(list->fd);
1485 #else
1486                         closesocket(list->fd);
1487 #endif
1488                 }
1489                 free(list);
1490                 list = nx;
1491         }
1492 }
1493
1494 size_t listen_get_mem(struct listen_dnsport* listen)
1495 {
1496         struct listen_list* p;
1497         size_t s = sizeof(*listen) + sizeof(*listen->base) + 
1498                 sizeof(*listen->udp_buff) + 
1499                 sldns_buffer_capacity(listen->udp_buff);
1500 #ifdef USE_DNSCRYPT
1501         s += sizeof(*listen->dnscrypt_udp_buff);
1502         if(listen->udp_buff != listen->dnscrypt_udp_buff){
1503                 s += sldns_buffer_capacity(listen->dnscrypt_udp_buff);
1504         }
1505 #endif
1506         for(p = listen->cps; p; p = p->next) {
1507                 s += sizeof(*p);
1508                 s += comm_point_get_mem(p->com);
1509         }
1510         return s;
1511 }
1512
1513 void listen_stop_accept(struct listen_dnsport* listen)
1514 {
1515         /* do not stop the ones that have no tcp_free list
1516          * (they have already stopped listening) */
1517         struct listen_list* p;
1518         for(p=listen->cps; p; p=p->next) {
1519                 if(p->com->type == comm_tcp_accept &&
1520                         p->com->tcp_free != NULL) {
1521                         comm_point_stop_listening(p->com);
1522                 }
1523         }
1524 }
1525
1526 void listen_start_accept(struct listen_dnsport* listen)
1527 {
1528         /* do not start the ones that have no tcp_free list, it is no
1529          * use to listen to them because they have no free tcp handlers */
1530         struct listen_list* p;
1531         for(p=listen->cps; p; p=p->next) {
1532                 if(p->com->type == comm_tcp_accept &&
1533                         p->com->tcp_free != NULL) {
1534                         comm_point_start_listening(p->com, -1, -1);
1535                 }
1536         }
1537 }
1538
1539 struct tcp_req_info*
1540 tcp_req_info_create(struct sldns_buffer* spoolbuf)
1541 {
1542         struct tcp_req_info* req = (struct tcp_req_info*)malloc(sizeof(*req));
1543         if(!req) {
1544                 log_err("malloc failure for new stream outoforder processing structure");
1545                 return NULL;
1546         }
1547         memset(req, 0, sizeof(*req));
1548         req->spool_buffer = spoolbuf;
1549         return req;
1550 }
1551
1552 void
1553 tcp_req_info_delete(struct tcp_req_info* req)
1554 {
1555         if(!req) return;
1556         tcp_req_info_clear(req);
1557         /* cp is pointer back to commpoint that owns this struct and
1558          * called delete on us */
1559         /* spool_buffer is shared udp buffer, not deleted here */
1560         free(req);
1561 }
1562
1563 void tcp_req_info_clear(struct tcp_req_info* req)
1564 {
1565         struct tcp_req_open_item* open, *nopen;
1566         struct tcp_req_done_item* item, *nitem;
1567         if(!req) return;
1568
1569         /* free outstanding request mesh reply entries */
1570         open = req->open_req_list;
1571         while(open) {
1572                 nopen = open->next;
1573                 mesh_state_remove_reply(open->mesh, open->mesh_state, req->cp);
1574                 free(open);
1575                 open = nopen;
1576         }
1577         req->open_req_list = NULL;
1578         req->num_open_req = 0;
1579         
1580         /* free pending writable result packets */
1581         item = req->done_req_list;
1582         while(item) {
1583                 nitem = item->next;
1584                 lock_basic_lock(&stream_wait_count_lock);
1585                 stream_wait_count -= (sizeof(struct tcp_req_done_item)
1586                         +item->len);
1587                 lock_basic_unlock(&stream_wait_count_lock);
1588                 free(item->buf);
1589                 free(item);
1590                 item = nitem;
1591         }
1592         req->done_req_list = NULL;
1593         req->num_done_req = 0;
1594         req->read_is_closed = 0;
1595 }
1596
1597 void
1598 tcp_req_info_remove_mesh_state(struct tcp_req_info* req, struct mesh_state* m)
1599 {
1600         struct tcp_req_open_item* open, *prev = NULL;
1601         if(!req || !m) return;
1602         open = req->open_req_list;
1603         while(open) {
1604                 if(open->mesh_state == m) {
1605                         struct tcp_req_open_item* next;
1606                         if(prev) prev->next = open->next;
1607                         else req->open_req_list = open->next;
1608                         /* caller has to manage the mesh state reply entry */
1609                         next = open->next;
1610                         free(open);
1611                         req->num_open_req --;
1612
1613                         /* prev = prev; */
1614                         open = next;
1615                         continue;
1616                 }
1617                 prev = open;
1618                 open = open->next;
1619         }
1620 }
1621
1622 /** setup listening for read or write */
1623 static void
1624 tcp_req_info_setup_listen(struct tcp_req_info* req)
1625 {
1626         int wr = 0;
1627         int rd = 0;
1628
1629         if(req->cp->tcp_byte_count != 0) {
1630                 /* cannot change, halfway through */
1631                 return;
1632         }
1633
1634         if(!req->cp->tcp_is_reading)
1635                 wr = 1;
1636         if(req->num_open_req + req->num_done_req < TCP_MAX_REQ_SIMULTANEOUS &&
1637                 !req->read_is_closed)
1638                 rd = 1;
1639         
1640         if(wr) {
1641                 req->cp->tcp_is_reading = 0;
1642                 comm_point_stop_listening(req->cp);
1643                 comm_point_start_listening(req->cp, -1,
1644                         req->cp->tcp_timeout_msec);
1645         } else if(rd) {
1646                 req->cp->tcp_is_reading = 1;
1647                 comm_point_stop_listening(req->cp);
1648                 comm_point_start_listening(req->cp, -1,
1649                         req->cp->tcp_timeout_msec);
1650                 /* and also read it (from SSL stack buffers), so
1651                  * no event read event is expected since the remainder of
1652                  * the TLS frame is sitting in the buffers. */
1653                 req->read_again = 1;
1654         } else {
1655                 comm_point_stop_listening(req->cp);
1656                 comm_point_start_listening(req->cp, -1,
1657                         req->cp->tcp_timeout_msec);
1658                 comm_point_listen_for_rw(req->cp, 0, 0);
1659         }
1660 }
1661
1662 /** remove first item from list of pending results */
1663 static struct tcp_req_done_item*
1664 tcp_req_info_pop_done(struct tcp_req_info* req)
1665 {
1666         struct tcp_req_done_item* item;
1667         log_assert(req->num_done_req > 0 && req->done_req_list);
1668         item = req->done_req_list;
1669         lock_basic_lock(&stream_wait_count_lock);
1670         stream_wait_count -= (sizeof(struct tcp_req_done_item)+item->len);
1671         lock_basic_unlock(&stream_wait_count_lock);
1672         req->done_req_list = req->done_req_list->next;
1673         req->num_done_req --;
1674         return item;
1675 }
1676
1677 /** Send given buffer and setup to write */
1678 static void
1679 tcp_req_info_start_write_buf(struct tcp_req_info* req, uint8_t* buf,
1680         size_t len)
1681 {
1682         sldns_buffer_clear(req->cp->buffer);
1683         sldns_buffer_write(req->cp->buffer, buf, len);
1684         sldns_buffer_flip(req->cp->buffer);
1685
1686         req->cp->tcp_is_reading = 0; /* we are now writing */
1687 }
1688
1689 /** pick up the next result and start writing it to the channel */
1690 static void
1691 tcp_req_pickup_next_result(struct tcp_req_info* req)
1692 {
1693         if(req->num_done_req > 0) {
1694                 /* unlist the done item from the list of pending results */
1695                 struct tcp_req_done_item* item = tcp_req_info_pop_done(req);
1696                 tcp_req_info_start_write_buf(req, item->buf, item->len);
1697                 free(item->buf);
1698                 free(item);
1699         }
1700 }
1701
1702 /** the read channel has closed */
1703 int
1704 tcp_req_info_handle_read_close(struct tcp_req_info* req)
1705 {
1706         verbose(VERB_ALGO, "tcp channel read side closed %d", req->cp->fd);
1707         /* reset byte count for (potential) partial read */
1708         req->cp->tcp_byte_count = 0;
1709         /* if we still have results to write, pick up next and write it */
1710         if(req->num_done_req != 0) {
1711                 tcp_req_pickup_next_result(req);
1712                 tcp_req_info_setup_listen(req);
1713                 return 1;
1714         }
1715         /* if nothing to do, this closes the connection */
1716         if(req->num_open_req == 0 && req->num_done_req == 0)
1717                 return 0;
1718         /* otherwise, we must be waiting for dns resolve, wait with timeout */
1719         req->read_is_closed = 1;
1720         tcp_req_info_setup_listen(req);
1721         return 1;
1722 }
1723
1724 void
1725 tcp_req_info_handle_writedone(struct tcp_req_info* req)
1726 {
1727         /* back to reading state, we finished this write event */
1728         sldns_buffer_clear(req->cp->buffer);
1729         if(req->num_done_req == 0 && req->read_is_closed) {
1730                 /* no more to write and nothing to read, close it */
1731                 comm_point_drop_reply(&req->cp->repinfo);
1732                 return;
1733         }
1734         req->cp->tcp_is_reading = 1;
1735         /* see if another result needs writing */
1736         tcp_req_pickup_next_result(req);
1737
1738         /* see if there is more to write, if not stop_listening for writing */
1739         /* see if new requests are allowed, if so, start_listening
1740          * for reading */
1741         tcp_req_info_setup_listen(req);
1742 }
1743
1744 void
1745 tcp_req_info_handle_readdone(struct tcp_req_info* req)
1746 {
1747         struct comm_point* c = req->cp;
1748
1749         /* we want to read up several requests, unless there are
1750          * pending answers */
1751
1752         req->is_drop = 0;
1753         req->is_reply = 0;
1754         req->in_worker_handle = 1;
1755         sldns_buffer_set_limit(req->spool_buffer, 0);
1756         /* handle the current request */
1757         /* this calls the worker handle request routine that could give
1758          * a cache response, or localdata response, or drop the reply,
1759          * or schedule a mesh entry for later */
1760         fptr_ok(fptr_whitelist_comm_point(c->callback));
1761         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1762                 req->in_worker_handle = 0;
1763                 /* there is an answer, put it up.  It is already in the
1764                  * c->buffer, just send it. */
1765                 /* since we were just reading a query, the channel is
1766                  * clear to write to */
1767         send_it:
1768                 c->tcp_is_reading = 0;
1769                 comm_point_stop_listening(c);
1770                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1771                 return;
1772         }
1773         req->in_worker_handle = 0;
1774         /* it should be waiting in the mesh for recursion.
1775          * If mesh failed to add a new entry and called commpoint_drop_reply. 
1776          * Then the mesh state has been cleared. */
1777         if(req->is_drop) {
1778                 /* the reply has been dropped, stream has been closed. */
1779                 return;
1780         }
1781         /* If mesh failed(mallocfail) and called commpoint_send_reply with
1782          * something like servfail then we pick up that reply below. */
1783         if(req->is_reply) {
1784                 goto send_it;
1785         }
1786
1787         sldns_buffer_clear(c->buffer);
1788         /* if pending answers, pick up an answer and start sending it */
1789         tcp_req_pickup_next_result(req);
1790
1791         /* if answers pending, start sending answers */
1792         /* read more requests if we can have more requests */
1793         tcp_req_info_setup_listen(req);
1794 }
1795
1796 int
1797 tcp_req_info_add_meshstate(struct tcp_req_info* req,
1798         struct mesh_area* mesh, struct mesh_state* m)
1799 {
1800         struct tcp_req_open_item* item;
1801         log_assert(req && mesh && m);
1802         item = (struct tcp_req_open_item*)malloc(sizeof(*item));
1803         if(!item) return 0;
1804         item->next = req->open_req_list;
1805         item->mesh = mesh;
1806         item->mesh_state = m;
1807         req->open_req_list = item;
1808         req->num_open_req++;
1809         return 1;
1810 }
1811
1812 /** Add a result to the result list.  At the end. */
1813 static int
1814 tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len)
1815 {
1816         struct tcp_req_done_item* last = NULL;
1817         struct tcp_req_done_item* item;
1818         size_t space;
1819
1820         /* see if we have space */
1821         space = sizeof(struct tcp_req_done_item) + len;
1822         lock_basic_lock(&stream_wait_count_lock);
1823         if(stream_wait_count + space > stream_wait_max) {
1824                 lock_basic_unlock(&stream_wait_count_lock);
1825                 verbose(VERB_ALGO, "drop stream reply, no space left, in stream-wait-size");
1826                 return 0;
1827         }
1828         stream_wait_count += space;
1829         lock_basic_unlock(&stream_wait_count_lock);
1830
1831         /* find last element */
1832         last = req->done_req_list;
1833         while(last && last->next)
1834                 last = last->next;
1835         
1836         /* create new element */
1837         item = (struct tcp_req_done_item*)malloc(sizeof(*item));
1838         if(!item) {
1839                 log_err("malloc failure, for stream result list");
1840                 return 0;
1841         }
1842         item->next = NULL;
1843         item->len = len;
1844         item->buf = memdup(buf, len);
1845         if(!item->buf) {
1846                 free(item);
1847                 log_err("malloc failure, adding reply to stream result list");
1848                 return 0;
1849         }
1850
1851         /* link in */
1852         if(last) last->next = item;
1853         else req->done_req_list = item;
1854         req->num_done_req++;
1855         return 1;
1856 }
1857
1858 void
1859 tcp_req_info_send_reply(struct tcp_req_info* req)
1860 {
1861         if(req->in_worker_handle) {
1862                 /* reply from mesh is in the spool_buffer */
1863                 /* copy now, so that the spool buffer is free for other tasks
1864                  * before the callback is done */
1865                 sldns_buffer_clear(req->cp->buffer);
1866                 sldns_buffer_write(req->cp->buffer,
1867                         sldns_buffer_begin(req->spool_buffer),
1868                         sldns_buffer_limit(req->spool_buffer));
1869                 sldns_buffer_flip(req->cp->buffer);
1870                 req->is_reply = 1;
1871                 return;
1872         }
1873         /* now that the query has been handled, that mesh_reply entry
1874          * should be removed, from the tcp_req_info list,
1875          * the mesh state cleanup removes then with region_cleanup and
1876          * replies_sent true. */
1877         /* see if we can send it straight away (we are not doing
1878          * anything else).  If so, copy to buffer and start */
1879         if(req->cp->tcp_is_reading && req->cp->tcp_byte_count == 0) {
1880                 /* buffer is free, and was ready to read new query into,
1881                  * but we are now going to use it to send this answer */
1882                 tcp_req_info_start_write_buf(req,
1883                         sldns_buffer_begin(req->spool_buffer),
1884                         sldns_buffer_limit(req->spool_buffer));
1885                 /* switch to listen to write events */
1886                 comm_point_stop_listening(req->cp);
1887                 comm_point_start_listening(req->cp, -1,
1888                         req->cp->tcp_timeout_msec);
1889                 return;
1890         }
1891         /* queue up the answer behind the others already pending */
1892         if(!tcp_req_info_add_result(req, sldns_buffer_begin(req->spool_buffer),
1893                 sldns_buffer_limit(req->spool_buffer))) {
1894                 /* drop the connection, we are out of resources */
1895                 comm_point_drop_reply(&req->cp->repinfo);
1896         }
1897 }
1898
1899 size_t tcp_req_info_get_stream_buffer_size(void)
1900 {
1901         size_t s;
1902         if(!stream_wait_lock_inited)
1903                 return stream_wait_count;
1904         lock_basic_lock(&stream_wait_count_lock);
1905         s = stream_wait_count;
1906         lock_basic_unlock(&stream_wait_count_lock);
1907         return s;
1908 }