]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - contrib/unbound/services/listen_dnsport.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / contrib / unbound / services / listen_dnsport.c
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include "services/listen_dnsport.h"
47 #include "services/outside_network.h"
48 #include "util/netevent.h"
49 #include "util/log.h"
50 #include "util/config_file.h"
51 #include "util/net_help.h"
52 #include "sldns/sbuffer.h"
53
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57 #include <fcntl.h>
58
59 #ifdef HAVE_SYS_UN_H
60 #include <sys/un.h>
61 #endif
62
63 /** number of queued TCP connections for listen() */
64 #define TCP_BACKLOG 256 
65
66 /**
67  * Debug print of the getaddrinfo returned address.
68  * @param addr: the address returned.
69  */
70 static void
71 verbose_print_addr(struct addrinfo *addr)
72 {
73         if(verbosity >= VERB_ALGO) {
74                 char buf[100];
75                 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76 #ifdef INET6
77                 if(addr->ai_family == AF_INET6)
78                         sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79                                 sin6_addr;
80 #endif /* INET6 */
81                 if(inet_ntop(addr->ai_family, sinaddr, buf,
82                         (socklen_t)sizeof(buf)) == 0) {
83                         (void)strlcpy(buf, "(null)", sizeof(buf));
84                 }
85                 buf[sizeof(buf)-1] = 0;
86                 verbose(VERB_ALGO, "creating %s%s socket %s %d", 
87                         addr->ai_socktype==SOCK_DGRAM?"udp":
88                         addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89                         addr->ai_family==AF_INET?"4":
90                         addr->ai_family==AF_INET6?"6":
91                         "_otherfam", buf, 
92                         ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93         }
94 }
95
96 int
97 create_udp_sock(int family, int socktype, struct sockaddr* addr,
98         socklen_t addrlen, int v6only, int* inuse, int* noproto,
99         int rcv, int snd, int listen, int* reuseport, int transparent)
100 {
101         int s;
102 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT)
103         int on=1;
104 #endif
105 #ifdef IPV6_MTU
106         int mtu = IPV6_MIN_MTU;
107 #endif
108 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109         (void)rcv;
110 #endif
111 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112         (void)snd;
113 #endif
114 #ifndef IPV6_V6ONLY
115         (void)v6only;
116 #endif
117 #ifndef IP_TRANSPARENT
118         (void)transparent;
119 #endif
120         if((s = socket(family, socktype, 0)) == -1) {
121                 *inuse = 0;
122 #ifndef USE_WINSOCK
123                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
124                         *noproto = 1;
125                         return -1;
126                 }
127                 log_err("can't create socket: %s", strerror(errno));
128 #else
129                 if(WSAGetLastError() == WSAEAFNOSUPPORT || 
130                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
131                         *noproto = 1;
132                         return -1;
133                 }
134                 log_err("can't create socket: %s", 
135                         wsa_strerror(WSAGetLastError()));
136 #endif
137                 *noproto = 0;
138                 return -1;
139         }
140         if(listen) {
141 #ifdef SO_REUSEADDR
142                 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
143                         (socklen_t)sizeof(on)) < 0) {
144 #ifndef USE_WINSOCK
145                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
146                                 strerror(errno));
147                         if(errno != ENOSYS) {
148                                 close(s);
149                                 *noproto = 0;
150                                 *inuse = 0;
151                                 return -1;
152                         }
153 #else
154                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
155                                 wsa_strerror(WSAGetLastError()));
156                         closesocket(s);
157                         *noproto = 0;
158                         *inuse = 0;
159                         return -1;
160 #endif
161                 }
162 #endif /* SO_REUSEADDR */
163 #ifdef SO_REUSEPORT
164                 /* try to set SO_REUSEPORT so that incoming
165                  * queries are distributed evenly among the receiving threads.
166                  * Each thread must have its own socket bound to the same port,
167                  * with SO_REUSEPORT set on each socket.
168                  */
169                 if (reuseport && *reuseport &&
170                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
171                         (socklen_t)sizeof(on)) < 0) {
172 #ifdef ENOPROTOOPT
173                         if(errno != ENOPROTOOPT || verbosity >= 3)
174                                 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
175                                         strerror(errno));
176 #endif
177                         /* this option is not essential, we can continue */
178                         *reuseport = 0;
179                 }
180 #else
181                 (void)reuseport;
182 #endif /* defined(SO_REUSEPORT) */
183 #ifdef IP_TRANSPARENT
184                 if (transparent &&
185                     setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
186                     (socklen_t)sizeof(on)) < 0) {
187                         log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
188                         strerror(errno));
189                 }
190 #endif /* IP_TRANSPARENT */
191         }
192         if(rcv) {
193 #ifdef SO_RCVBUF
194                 int got;
195                 socklen_t slen = (socklen_t)sizeof(got);
196 #  ifdef SO_RCVBUFFORCE
197                 /* Linux specific: try to use root permission to override
198                  * system limits on rcvbuf. The limit is stored in 
199                  * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
200                 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 
201                         (socklen_t)sizeof(rcv)) < 0) {
202                         if(errno != EPERM) {
203 #    ifndef USE_WINSOCK
204                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
205                                         "...) failed: %s", strerror(errno));
206                                 close(s);
207 #    else
208                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
209                                         "...) failed: %s", 
210                                         wsa_strerror(WSAGetLastError()));
211                                 closesocket(s);
212 #    endif
213                                 *noproto = 0;
214                                 *inuse = 0;
215                                 return -1;
216                         }
217 #  endif /* SO_RCVBUFFORCE */
218                         if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 
219                                 (socklen_t)sizeof(rcv)) < 0) {
220 #  ifndef USE_WINSOCK
221                                 log_err("setsockopt(..., SO_RCVBUF, "
222                                         "...) failed: %s", strerror(errno));
223                                 close(s);
224 #  else
225                                 log_err("setsockopt(..., SO_RCVBUF, "
226                                         "...) failed: %s", 
227                                         wsa_strerror(WSAGetLastError()));
228                                 closesocket(s);
229 #  endif
230                                 *noproto = 0;
231                                 *inuse = 0;
232                                 return -1;
233                         }
234                         /* check if we got the right thing or if system
235                          * reduced to some system max.  Warn if so */
236                         if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 
237                                 &slen) >= 0 && got < rcv/2) {
238                                 log_warn("so-rcvbuf %u was not granted. "
239                                         "Got %u. To fix: start with "
240                                         "root permissions(linux) or sysctl "
241                                         "bigger net.core.rmem_max(linux) or "
242                                         "kern.ipc.maxsockbuf(bsd) values.",
243                                         (unsigned)rcv, (unsigned)got);
244                         }
245 #  ifdef SO_RCVBUFFORCE
246                 }
247 #  endif
248 #endif /* SO_RCVBUF */
249         }
250         /* first do RCVBUF as the receive buffer is more important */
251         if(snd) {
252 #ifdef SO_SNDBUF
253                 int got;
254                 socklen_t slen = (socklen_t)sizeof(got);
255 #  ifdef SO_SNDBUFFORCE
256                 /* Linux specific: try to use root permission to override
257                  * system limits on sndbuf. The limit is stored in 
258                  * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
259                 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 
260                         (socklen_t)sizeof(snd)) < 0) {
261                         if(errno != EPERM) {
262 #    ifndef USE_WINSOCK
263                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
264                                         "...) failed: %s", strerror(errno));
265                                 close(s);
266 #    else
267                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
268                                         "...) failed: %s", 
269                                         wsa_strerror(WSAGetLastError()));
270                                 closesocket(s);
271 #    endif
272                                 *noproto = 0;
273                                 *inuse = 0;
274                                 return -1;
275                         }
276 #  endif /* SO_SNDBUFFORCE */
277                         if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 
278                                 (socklen_t)sizeof(snd)) < 0) {
279 #  ifndef USE_WINSOCK
280                                 log_err("setsockopt(..., SO_SNDBUF, "
281                                         "...) failed: %s", strerror(errno));
282                                 close(s);
283 #  else
284                                 log_err("setsockopt(..., SO_SNDBUF, "
285                                         "...) failed: %s", 
286                                         wsa_strerror(WSAGetLastError()));
287                                 closesocket(s);
288 #  endif
289                                 *noproto = 0;
290                                 *inuse = 0;
291                                 return -1;
292                         }
293                         /* check if we got the right thing or if system
294                          * reduced to some system max.  Warn if so */
295                         if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 
296                                 &slen) >= 0 && got < snd/2) {
297                                 log_warn("so-sndbuf %u was not granted. "
298                                         "Got %u. To fix: start with "
299                                         "root permissions(linux) or sysctl "
300                                         "bigger net.core.wmem_max(linux) or "
301                                         "kern.ipc.maxsockbuf(bsd) values.",
302                                         (unsigned)snd, (unsigned)got);
303                         }
304 #  ifdef SO_SNDBUFFORCE
305                 }
306 #  endif
307 #endif /* SO_SNDBUF */
308         }
309         if(family == AF_INET6) {
310 # if defined(IPV6_V6ONLY)
311                 if(v6only) {
312                         int val=(v6only==2)?0:1;
313                         if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
314                                 (void*)&val, (socklen_t)sizeof(val)) < 0) {
315 #ifndef USE_WINSOCK
316                                 log_err("setsockopt(..., IPV6_V6ONLY"
317                                         ", ...) failed: %s", strerror(errno));
318                                 close(s);
319 #else
320                                 log_err("setsockopt(..., IPV6_V6ONLY"
321                                         ", ...) failed: %s", 
322                                         wsa_strerror(WSAGetLastError()));
323                                 closesocket(s);
324 #endif
325                                 *noproto = 0;
326                                 *inuse = 0;
327                                 return -1;
328                         }
329                 }
330 # endif
331 # if defined(IPV6_USE_MIN_MTU)
332                 /*
333                  * There is no fragmentation of IPv6 datagrams
334                  * during forwarding in the network. Therefore
335                  * we do not send UDP datagrams larger than
336                  * the minimum IPv6 MTU of 1280 octets. The
337                  * EDNS0 message length can be larger if the
338                  * network stack supports IPV6_USE_MIN_MTU.
339                  */
340                 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
341                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
342 #  ifndef USE_WINSOCK
343                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
344                                 "...) failed: %s", strerror(errno));
345                         close(s);
346 #  else
347                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
348                                 "...) failed: %s", 
349                                 wsa_strerror(WSAGetLastError()));
350                         closesocket(s);
351 #  endif
352                         *noproto = 0;
353                         *inuse = 0;
354                         return -1;
355                 }
356 # elif defined(IPV6_MTU)
357                 /*
358                  * On Linux, to send no larger than 1280, the PMTUD is
359                  * disabled by default for datagrams anyway, so we set
360                  * the MTU to use.
361                  */
362                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
363                         (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
364 #  ifndef USE_WINSOCK
365                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
366                                 strerror(errno));
367                         close(s);
368 #  else
369                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
370                                 wsa_strerror(WSAGetLastError()));
371                         closesocket(s);
372 #  endif
373                         *noproto = 0;
374                         *inuse = 0;
375                         return -1;
376                 }
377 # endif /* IPv6 MTU */
378         } else if(family == AF_INET) {
379 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
380 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
381  * PMTU information is not accepted, but fragmentation is allowed
382  * if and only if the packet size exceeds the outgoing interface MTU
383  * (and also uses the interface mtu to determine the size of the packets).
384  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
385  * FreeBSD already has same semantics without setting the option. */
386                 int omit_set = 0;
387                 int action;
388 #   if defined(IP_PMTUDISC_OMIT)
389                 action = IP_PMTUDISC_OMIT;
390                 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 
391                         &action, (socklen_t)sizeof(action)) < 0) {
392
393                         if (errno != EINVAL) {
394                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
395                                         strerror(errno));
396
397 #    ifndef USE_WINSOCK
398                                 close(s);
399 #    else
400                                 closesocket(s);
401 #    endif
402                                 *noproto = 0;
403                                 *inuse = 0;
404                                 return -1;
405                         }
406                 }
407                 else
408                 {
409                     omit_set = 1;
410                 }
411 #   endif
412                 if (omit_set == 0) {
413                         action = IP_PMTUDISC_DONT;
414                         if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
415                                 &action, (socklen_t)sizeof(action)) < 0) {
416                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
417                                         strerror(errno));
418 #    ifndef USE_WINSOCK
419                                 close(s);
420 #    else
421                                 closesocket(s);
422 #    endif
423                                 *noproto = 0;
424                                 *inuse = 0;
425                                 return -1;
426                         }
427                 }
428 #  elif defined(IP_DONTFRAG)
429                 int off = 0;
430                 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 
431                         &off, (socklen_t)sizeof(off)) < 0) {
432                         log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
433                                 strerror(errno));
434 #    ifndef USE_WINSOCK
435                         close(s);
436 #    else
437                         closesocket(s);
438 #    endif
439                         *noproto = 0;
440                         *inuse = 0;
441                         return -1;
442                 }
443 #  endif /* IPv4 MTU */
444         }
445         if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
446                 *noproto = 0;
447                 *inuse = 0;
448 #ifndef USE_WINSOCK
449 #ifdef EADDRINUSE
450                 *inuse = (errno == EADDRINUSE);
451                 /* detect freebsd jail with no ipv6 permission */
452                 if(family==AF_INET6 && errno==EINVAL)
453                         *noproto = 1;
454                 else if(errno != EADDRINUSE) {
455                         log_err_addr("can't bind socket", strerror(errno),
456                                 (struct sockaddr_storage*)addr, addrlen);
457                 }
458 #endif /* EADDRINUSE */
459                 close(s);
460 #else /* USE_WINSOCK */
461                 if(WSAGetLastError() != WSAEADDRINUSE &&
462                         WSAGetLastError() != WSAEADDRNOTAVAIL) {
463                         log_err_addr("can't bind socket", 
464                                 wsa_strerror(WSAGetLastError()),
465                                 (struct sockaddr_storage*)addr, addrlen);
466                 }
467                 closesocket(s);
468 #endif
469                 return -1;
470         }
471         if(!fd_set_nonblock(s)) {
472                 *noproto = 0;
473                 *inuse = 0;
474 #ifndef USE_WINSOCK
475                 close(s);
476 #else
477                 closesocket(s);
478 #endif
479                 return -1;
480         }
481         return s;
482 }
483
484 int
485 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
486         int* reuseport, int transparent)
487 {
488         int s;
489 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT)
490         int on = 1;
491 #endif
492 #ifndef IP_TRANSPARENT
493         (void)transparent;
494 #endif
495         verbose_print_addr(addr);
496         *noproto = 0;
497         if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
498 #ifndef USE_WINSOCK
499                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
500                         *noproto = 1;
501                         return -1;
502                 }
503                 log_err("can't create socket: %s", strerror(errno));
504 #else
505                 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
506                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
507                         *noproto = 1;
508                         return -1;
509                 }
510                 log_err("can't create socket: %s", 
511                         wsa_strerror(WSAGetLastError()));
512 #endif
513                 return -1;
514         }
515 #ifdef SO_REUSEADDR
516         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
517                 (socklen_t)sizeof(on)) < 0) {
518 #ifndef USE_WINSOCK
519                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
520                         strerror(errno));
521                 close(s);
522 #else
523                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
524                         wsa_strerror(WSAGetLastError()));
525                 closesocket(s);
526 #endif
527                 return -1;
528         }
529 #endif /* SO_REUSEADDR */
530 #ifdef SO_REUSEPORT
531         /* try to set SO_REUSEPORT so that incoming
532          * connections are distributed evenly among the receiving threads.
533          * Each thread must have its own socket bound to the same port,
534          * with SO_REUSEPORT set on each socket.
535          */
536         if (reuseport && *reuseport &&
537                 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
538                 (socklen_t)sizeof(on)) < 0) {
539 #ifdef ENOPROTOOPT
540                 if(errno != ENOPROTOOPT || verbosity >= 3)
541                         log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
542                                 strerror(errno));
543 #endif
544                 /* this option is not essential, we can continue */
545                 *reuseport = 0;
546         }
547 #else
548         (void)reuseport;
549 #endif /* defined(SO_REUSEPORT) */
550 #if defined(IPV6_V6ONLY)
551         if(addr->ai_family == AF_INET6 && v6only) {
552                 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
553                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
554 #ifndef USE_WINSOCK
555                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
556                                 strerror(errno));
557                         close(s);
558 #else
559                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
560                                 wsa_strerror(WSAGetLastError()));
561                         closesocket(s);
562 #endif
563                         return -1;
564                 }
565         }
566 #else
567         (void)v6only;
568 #endif /* IPV6_V6ONLY */
569 #ifdef IP_TRANSPARENT
570         if (transparent &&
571             setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
572             (socklen_t)sizeof(on)) < 0) {
573                 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
574                         strerror(errno));
575         }
576 #endif /* IP_TRANSPARENT */
577         if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
578 #ifndef USE_WINSOCK
579                 /* detect freebsd jail with no ipv6 permission */
580                 if(addr->ai_family==AF_INET6 && errno==EINVAL)
581                         *noproto = 1;
582                 else {
583                         log_err_addr("can't bind socket", strerror(errno),
584                                 (struct sockaddr_storage*)addr->ai_addr,
585                                 addr->ai_addrlen);
586                 }
587                 close(s);
588 #else
589                 log_err_addr("can't bind socket", 
590                         wsa_strerror(WSAGetLastError()),
591                         (struct sockaddr_storage*)addr->ai_addr,
592                         addr->ai_addrlen);
593                 closesocket(s);
594 #endif
595                 return -1;
596         }
597         if(!fd_set_nonblock(s)) {
598 #ifndef USE_WINSOCK
599                 close(s);
600 #else
601                 closesocket(s);
602 #endif
603                 return -1;
604         }
605         if(listen(s, TCP_BACKLOG) == -1) {
606 #ifndef USE_WINSOCK
607                 log_err("can't listen: %s", strerror(errno));
608                 close(s);
609 #else
610                 log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
611                 closesocket(s);
612 #endif
613                 return -1;
614         }
615         return s;
616 }
617
618 int
619 create_local_accept_sock(const char *path, int* noproto)
620 {
621 #ifdef HAVE_SYS_UN_H
622         int s;
623         struct sockaddr_un usock;
624
625         verbose(VERB_ALGO, "creating unix socket %s", path);
626 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
627         /* this member exists on BSDs, not Linux */
628         usock.sun_len = (socklen_t)sizeof(usock);
629 #endif
630         usock.sun_family = AF_LOCAL;
631         /* length is 92-108, 104 on FreeBSD */
632         (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
633
634         if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
635                 log_err("Cannot create local socket %s (%s)",
636                         path, strerror(errno));
637                 return -1;
638         }
639
640         if (unlink(path) && errno != ENOENT) {
641                 /* The socket already exists and cannot be removed */
642                 log_err("Cannot remove old local socket %s (%s)",
643                         path, strerror(errno));
644                 return -1;
645         }
646
647         if (bind(s, (struct sockaddr *)&usock,
648                 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
649                 log_err("Cannot bind local socket %s (%s)",
650                         path, strerror(errno));
651                 return -1;
652         }
653
654         if (!fd_set_nonblock(s)) {
655                 log_err("Cannot set non-blocking mode");
656                 return -1;
657         }
658
659         if (listen(s, TCP_BACKLOG) == -1) {
660                 log_err("can't listen: %s", strerror(errno));
661                 return -1;
662         }
663
664         (void)noproto; /*unused*/
665         return s;
666 #else
667         (void)path;
668         log_err("Local sockets are not supported");
669         *noproto = 1;
670         return -1;
671 #endif
672 }
673
674
675 /**
676  * Create socket from getaddrinfo results
677  */
678 static int
679 make_sock(int stype, const char* ifname, const char* port, 
680         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
681         int* reuseport, int transparent)
682 {
683         struct addrinfo *res = NULL;
684         int r, s, inuse, noproto;
685         hints->ai_socktype = stype;
686         *noip6 = 0;
687         if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
688 #ifdef USE_WINSOCK
689                 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
690                         *noip6 = 1; /* 'Host not found' for IP6 on winXP */
691                         return -1;
692                 }
693 #endif
694                 log_err("node %s:%s getaddrinfo: %s %s", 
695                         ifname?ifname:"default", port, gai_strerror(r),
696 #ifdef EAI_SYSTEM
697                         r==EAI_SYSTEM?(char*)strerror(errno):""
698 #else
699                         ""
700 #endif
701                 );
702                 return -1;
703         }
704         if(stype == SOCK_DGRAM) {
705                 verbose_print_addr(res);
706                 s = create_udp_sock(res->ai_family, res->ai_socktype,
707                         (struct sockaddr*)res->ai_addr, res->ai_addrlen,
708                         v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
709                         reuseport, transparent);
710                 if(s == -1 && inuse) {
711                         log_err("bind: address already in use");
712                 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
713                         *noip6 = 1;
714                 }
715         } else  {
716                 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
717                         transparent);
718                 if(s == -1 && noproto && hints->ai_family == AF_INET6){
719                         *noip6 = 1;
720                 }
721         }
722         freeaddrinfo(res);
723         return s;
724 }
725
726 /** make socket and first see if ifname contains port override info */
727 static int
728 make_sock_port(int stype, const char* ifname, const char* port, 
729         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
730         int* reuseport, int transparent)
731 {
732         char* s = strchr(ifname, '@');
733         if(s) {
734                 /* override port with ifspec@port */
735                 char p[16];
736                 char newif[128];
737                 if((size_t)(s-ifname) >= sizeof(newif)) {
738                         log_err("ifname too long: %s", ifname);
739                         *noip6 = 0;
740                         return -1;
741                 }
742                 if(strlen(s+1) >= sizeof(p)) {
743                         log_err("portnumber too long: %s", ifname);
744                         *noip6 = 0;
745                         return -1;
746                 }
747                 (void)strlcpy(newif, ifname, sizeof(newif));
748                 newif[s-ifname] = 0;
749                 (void)strlcpy(p, s+1, sizeof(p));
750                 p[strlen(s+1)]=0;
751                 return make_sock(stype, newif, p, hints, v6only, noip6,
752                         rcv, snd, reuseport, transparent);
753         }
754         return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
755                 reuseport, transparent);
756 }
757
758 /**
759  * Add port to open ports list.
760  * @param list: list head. changed.
761  * @param s: fd.
762  * @param ftype: if fd is UDP.
763  * @return false on failure. list in unchanged then.
764  */
765 static int
766 port_insert(struct listen_port** list, int s, enum listen_type ftype)
767 {
768         struct listen_port* item = (struct listen_port*)malloc(
769                 sizeof(struct listen_port));
770         if(!item)
771                 return 0;
772         item->next = *list;
773         item->fd = s;
774         item->ftype = ftype;
775         *list = item;
776         return 1;
777 }
778
779 /** set fd to receive source address packet info */
780 static int
781 set_recvpktinfo(int s, int family) 
782 {
783 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
784         int on = 1;
785 #else
786         (void)s;
787 #endif
788         if(family == AF_INET6) {
789 #           ifdef IPV6_RECVPKTINFO
790                 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
791                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
792                         log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
793                                 strerror(errno));
794                         return 0;
795                 }
796 #           elif defined(IPV6_PKTINFO)
797                 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
798                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
799                         log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
800                                 strerror(errno));
801                         return 0;
802                 }
803 #           else
804                 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
805                         "disable interface-automatic in config");
806                 return 0;
807 #           endif /* defined IPV6_RECVPKTINFO */
808
809         } else if(family == AF_INET) {
810 #           ifdef IP_PKTINFO
811                 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
812                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
813                         log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
814                                 strerror(errno));
815                         return 0;
816                 }
817 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
818                 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
819                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
820                         log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
821                                 strerror(errno));
822                         return 0;
823                 }
824 #           else
825                 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
826                         "interface-automatic in config");
827                 return 0;
828 #           endif /* IP_PKTINFO */
829
830         }
831         return 1;
832 }
833
834 /**
835  * Helper for ports_open. Creates one interface (or NULL for default).
836  * @param ifname: The interface ip address.
837  * @param do_auto: use automatic interface detection.
838  *      If enabled, then ifname must be the wildcard name.
839  * @param do_udp: if udp should be used.
840  * @param do_tcp: if udp should be used.
841  * @param hints: for getaddrinfo. family and flags have to be set by caller.
842  * @param port: Port number to use (as string).
843  * @param list: list of open ports, appended to, changed to point to list head.
844  * @param rcv: receive buffer size for UDP
845  * @param snd: send buffer size for UDP
846  * @param ssl_port: ssl service port number
847  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
848  *      set to false on exit if reuseport failed due to no kernel support.
849  * @param transparent: set IP_TRANSPARENT socket option.
850  * @return: returns false on error.
851  */
852 static int
853 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 
854         struct addrinfo *hints, const char* port, struct listen_port** list,
855         size_t rcv, size_t snd, int ssl_port, int* reuseport, int transparent)
856 {
857         int s, noip6=0;
858         if(!do_udp && !do_tcp)
859                 return 0;
860         if(do_auto) {
861                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
862                         &noip6, rcv, snd, reuseport, transparent)) == -1) {
863                         if(noip6) {
864                                 log_warn("IPv6 protocol not available");
865                                 return 1;
866                         }
867                         return 0;
868                 }
869                 /* getting source addr packet info is highly non-portable */
870                 if(!set_recvpktinfo(s, hints->ai_family)) {
871 #ifndef USE_WINSOCK
872                         close(s);
873 #else
874                         closesocket(s);
875 #endif
876                         return 0;
877                 }
878                 if(!port_insert(list, s, listen_type_udpancil)) {
879 #ifndef USE_WINSOCK
880                         close(s);
881 #else
882                         closesocket(s);
883 #endif
884                         return 0;
885                 }
886         } else if(do_udp) {
887                 /* regular udp socket */
888                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
889                         &noip6, rcv, snd, reuseport, transparent)) == -1) {
890                         if(noip6) {
891                                 log_warn("IPv6 protocol not available");
892                                 return 1;
893                         }
894                         return 0;
895                 }
896                 if(!port_insert(list, s, listen_type_udp)) {
897 #ifndef USE_WINSOCK
898                         close(s);
899 #else
900                         closesocket(s);
901 #endif
902                         return 0;
903                 }
904         }
905         if(do_tcp) {
906                 int is_ssl = ((strchr(ifname, '@') && 
907                         atoi(strchr(ifname, '@')+1) == ssl_port) ||
908                         (!strchr(ifname, '@') && atoi(port) == ssl_port));
909                 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 
910                         &noip6, 0, 0, reuseport, transparent)) == -1) {
911                         if(noip6) {
912                                 /*log_warn("IPv6 protocol not available");*/
913                                 return 1;
914                         }
915                         return 0;
916                 }
917                 if(is_ssl)
918                         verbose(VERB_ALGO, "setup TCP for SSL service");
919                 if(!port_insert(list, s, is_ssl?listen_type_ssl:
920                         listen_type_tcp)) {
921 #ifndef USE_WINSOCK
922                         close(s);
923 #else
924                         closesocket(s);
925 #endif
926                         return 0;
927                 }
928         }
929         return 1;
930 }
931
932 /** 
933  * Add items to commpoint list in front.
934  * @param c: commpoint to add.
935  * @param front: listen struct.
936  * @return: false on failure.
937  */
938 static int
939 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
940 {
941         struct listen_list* item = (struct listen_list*)malloc(
942                 sizeof(struct listen_list));
943         if(!item)
944                 return 0;
945         item->com = c;
946         item->next = front->cps;
947         front->cps = item;
948         return 1;
949 }
950
951 struct listen_dnsport* 
952 listen_create(struct comm_base* base, struct listen_port* ports,
953         size_t bufsize, int tcp_accept_count, void* sslctx,
954         struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
955 {
956         struct listen_dnsport* front = (struct listen_dnsport*)
957                 malloc(sizeof(struct listen_dnsport));
958         if(!front)
959                 return NULL;
960         front->cps = NULL;
961         front->udp_buff = sldns_buffer_new(bufsize);
962         if(!front->udp_buff) {
963                 free(front);
964                 return NULL;
965         }
966
967         /* create comm points as needed */
968         while(ports) {
969                 struct comm_point* cp = NULL;
970                 if(ports->ftype == listen_type_udp) 
971                         cp = comm_point_create_udp(base, ports->fd, 
972                                 front->udp_buff, cb, cb_arg);
973                 else if(ports->ftype == listen_type_tcp)
974                         cp = comm_point_create_tcp(base, ports->fd, 
975                                 tcp_accept_count, bufsize, cb, cb_arg);
976                 else if(ports->ftype == listen_type_ssl) {
977                         cp = comm_point_create_tcp(base, ports->fd, 
978                                 tcp_accept_count, bufsize, cb, cb_arg);
979                         cp->ssl = sslctx;
980                 } else if(ports->ftype == listen_type_udpancil) 
981                         cp = comm_point_create_udp_ancil(base, ports->fd, 
982                                 front->udp_buff, cb, cb_arg);
983                 if(!cp) {
984                         log_err("can't create commpoint");      
985                         listen_delete(front);
986                         return NULL;
987                 }
988                 cp->dtenv = dtenv;
989                 cp->do_not_close = 1;
990                 if(!listen_cp_insert(cp, front)) {
991                         log_err("malloc failed");
992                         comm_point_delete(cp);
993                         listen_delete(front);
994                         return NULL;
995                 }
996                 ports = ports->next;
997         }
998         if(!front->cps) {
999                 log_err("Could not open sockets to accept queries.");
1000                 listen_delete(front);
1001                 return NULL;
1002         }
1003
1004         return front;
1005 }
1006
1007 void
1008 listen_list_delete(struct listen_list* list)
1009 {
1010         struct listen_list *p = list, *pn;
1011         while(p) {
1012                 pn = p->next;
1013                 comm_point_delete(p->com);
1014                 free(p);
1015                 p = pn;
1016         }
1017 }
1018
1019 void 
1020 listen_delete(struct listen_dnsport* front)
1021 {
1022         if(!front) 
1023                 return;
1024         listen_list_delete(front->cps);
1025         sldns_buffer_free(front->udp_buff);
1026         free(front);
1027 }
1028
1029 struct listen_port* 
1030 listening_ports_open(struct config_file* cfg, int* reuseport)
1031 {
1032         struct listen_port* list = NULL;
1033         struct addrinfo hints;
1034         int i, do_ip4, do_ip6;
1035         int do_tcp, do_auto;
1036         char portbuf[32];
1037         snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1038         do_ip4 = cfg->do_ip4;
1039         do_ip6 = cfg->do_ip6;
1040         do_tcp = cfg->do_tcp;
1041         do_auto = cfg->if_automatic && cfg->do_udp;
1042         if(cfg->incoming_num_tcp == 0)
1043                 do_tcp = 0;
1044
1045         /* getaddrinfo */
1046         memset(&hints, 0, sizeof(hints));
1047         hints.ai_flags = AI_PASSIVE;
1048         /* no name lookups on our listening ports */
1049         if(cfg->num_ifs > 0)
1050                 hints.ai_flags |= AI_NUMERICHOST;
1051         hints.ai_family = AF_UNSPEC;
1052 #ifndef INET6
1053         do_ip6 = 0;
1054 #endif
1055         if(!do_ip4 && !do_ip6) {
1056                 return NULL;
1057         }
1058         /* create ip4 and ip6 ports so that return addresses are nice. */
1059         if(do_auto || cfg->num_ifs == 0) {
1060                 if(do_ip6) {
1061                         hints.ai_family = AF_INET6;
1062                         if(!ports_create_if(do_auto?"::0":"::1", 
1063                                 do_auto, cfg->do_udp, do_tcp, 
1064                                 &hints, portbuf, &list,
1065                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1066                                 cfg->ssl_port, reuseport,
1067                                 cfg->ip_transparent)) {
1068                                 listening_ports_free(list);
1069                                 return NULL;
1070                         }
1071                 }
1072                 if(do_ip4) {
1073                         hints.ai_family = AF_INET;
1074                         if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 
1075                                 do_auto, cfg->do_udp, do_tcp, 
1076                                 &hints, portbuf, &list,
1077                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1078                                 cfg->ssl_port, reuseport,
1079                                 cfg->ip_transparent)) {
1080                                 listening_ports_free(list);
1081                                 return NULL;
1082                         }
1083                 }
1084         } else for(i = 0; i<cfg->num_ifs; i++) {
1085                 if(str_is_ip6(cfg->ifs[i])) {
1086                         if(!do_ip6)
1087                                 continue;
1088                         hints.ai_family = AF_INET6;
1089                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1090                                 do_tcp, &hints, portbuf, &list, 
1091                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1092                                 cfg->ssl_port, reuseport,
1093                                 cfg->ip_transparent)) {
1094                                 listening_ports_free(list);
1095                                 return NULL;
1096                         }
1097                 } else {
1098                         if(!do_ip4)
1099                                 continue;
1100                         hints.ai_family = AF_INET;
1101                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1102                                 do_tcp, &hints, portbuf, &list, 
1103                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1104                                 cfg->ssl_port, reuseport,
1105                                 cfg->ip_transparent)) {
1106                                 listening_ports_free(list);
1107                                 return NULL;
1108                         }
1109                 }
1110         }
1111         return list;
1112 }
1113
1114 void listening_ports_free(struct listen_port* list)
1115 {
1116         struct listen_port* nx;
1117         while(list) {
1118                 nx = list->next;
1119                 if(list->fd != -1) {
1120 #ifndef USE_WINSOCK
1121                         close(list->fd);
1122 #else
1123                         closesocket(list->fd);
1124 #endif
1125                 }
1126                 free(list);
1127                 list = nx;
1128         }
1129 }
1130
1131 size_t listen_get_mem(struct listen_dnsport* listen)
1132 {
1133         size_t s = sizeof(*listen) + sizeof(*listen->base) + 
1134                 sizeof(*listen->udp_buff) + 
1135                 sldns_buffer_capacity(listen->udp_buff);
1136         struct listen_list* p;
1137         for(p = listen->cps; p; p = p->next) {
1138                 s += sizeof(*p);
1139                 s += comm_point_get_mem(p->com);
1140         }
1141         return s;
1142 }
1143
1144 void listen_stop_accept(struct listen_dnsport* listen)
1145 {
1146         /* do not stop the ones that have no tcp_free list
1147          * (they have already stopped listening) */
1148         struct listen_list* p;
1149         for(p=listen->cps; p; p=p->next) {
1150                 if(p->com->type == comm_tcp_accept &&
1151                         p->com->tcp_free != NULL) {
1152                         comm_point_stop_listening(p->com);
1153                 }
1154         }
1155 }
1156
1157 void listen_start_accept(struct listen_dnsport* listen)
1158 {
1159         /* do not start the ones that have no tcp_free list, it is no
1160          * use to listen to them because they have no free tcp handlers */
1161         struct listen_list* p;
1162         for(p=listen->cps; p; p=p->next) {
1163                 if(p->com->type == comm_tcp_accept &&
1164                         p->com->tcp_free != NULL) {
1165                         comm_point_start_listening(p->com, -1, -1);
1166                 }
1167         }
1168 }
1169